Merge branch 'main' into litellm_custom_pricing_ui_fix

This commit is contained in:
Krish Dholakia 2024-05-27 18:35:47 -07:00 committed by GitHub
commit dec482031b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
32 changed files with 638 additions and 859 deletions

View file

@ -11,7 +11,7 @@ Anthropic, OpenAI, Mistral, Llama and Gemini LLMs are Supported on Clarifai.
To obtain your Clarifai Personal access token follow this [link](https://docs.clarifai.com/clarifai-basics/authentication/personal-access-tokens/). Optionally the PAT can also be passed in `completion` function. To obtain your Clarifai Personal access token follow this [link](https://docs.clarifai.com/clarifai-basics/authentication/personal-access-tokens/). Optionally the PAT can also be passed in `completion` function.
```python ```python
os.environ["CALRIFAI_API_KEY"] = "YOUR_CLARIFAI_PAT" # CLARIFAI_PAT os.environ["CLARIFAI_API_KEY"] = "YOUR_CLARIFAI_PAT" # CLARIFAI_PAT
``` ```
## Usage ## Usage

View file

@ -21,6 +21,7 @@ Features:
- ✅ Don't log/store specific requests to Langfuse, Sentry, etc. (eg confidential LLM requests) - ✅ Don't log/store specific requests to Langfuse, Sentry, etc. (eg confidential LLM requests)
- ✅ Tracking Spend for Custom Tags - ✅ Tracking Spend for Custom Tags
- ✅ Custom Branding + Routes on Swagger Docs - ✅ Custom Branding + Routes on Swagger Docs
- ✅ Audit Logs for `Created At, Created By` when Models Added
## Content Moderation ## Content Moderation

View file

@ -9,12 +9,3 @@ Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
[![Chat on WhatsApp](https://img.shields.io/static/v1?label=Chat%20on&message=WhatsApp&color=success&logo=WhatsApp&style=flat-square)](https://wa.link/huol9n) [![Chat on Discord](https://img.shields.io/static/v1?label=Chat%20on&message=Discord&color=blue&logo=Discord&style=flat-square)](https://discord.gg/wuPM9dRgDw) [![Chat on WhatsApp](https://img.shields.io/static/v1?label=Chat%20on&message=WhatsApp&color=success&logo=WhatsApp&style=flat-square)](https://wa.link/huol9n) [![Chat on Discord](https://img.shields.io/static/v1?label=Chat%20on&message=Discord&color=blue&logo=Discord&style=flat-square)](https://discord.gg/wuPM9dRgDw)
## Stable Version
If you're running into problems with installation / Usage
Use the stable version of litellm
```shell
pip install litellm==0.1.819
```

View file

@ -766,7 +766,12 @@ from .llms.bedrock import (
AmazonMistralConfig, AmazonMistralConfig,
AmazonBedrockGlobalConfig, AmazonBedrockGlobalConfig,
) )
from .llms.openai import OpenAIConfig, OpenAITextCompletionConfig, MistralConfig from .llms.openai import (
OpenAIConfig,
OpenAITextCompletionConfig,
MistralConfig,
DeepInfraConfig,
)
from .llms.azure import AzureOpenAIConfig, AzureOpenAIError from .llms.azure import AzureOpenAIConfig, AzureOpenAIError
from .llms.watsonx import IBMWatsonXAIConfig from .llms.watsonx import IBMWatsonXAIConfig
from .main import * # type: ignore from .main import * # type: ignore

View file

@ -157,6 +157,101 @@ class MistralConfig:
) )
if param == "seed": if param == "seed":
optional_params["extra_body"] = {"random_seed": value} optional_params["extra_body"] = {"random_seed": value}
if param == "response_format":
optional_params["response_format"] = value
return optional_params
class DeepInfraConfig:
"""
Reference: https://deepinfra.com/docs/advanced/openai_api
The class `DeepInfra` provides configuration for the DeepInfra's Chat Completions API interface. Below are the parameters:
"""
frequency_penalty: Optional[int] = None
function_call: Optional[Union[str, dict]] = None
functions: Optional[list] = None
logit_bias: Optional[dict] = None
max_tokens: Optional[int] = None
n: Optional[int] = None
presence_penalty: Optional[int] = None
stop: Optional[Union[str, list]] = None
temperature: Optional[int] = None
top_p: Optional[int] = None
response_format: Optional[dict] = None
tools: Optional[list] = None
tool_choice: Optional[Union[str, dict]] = None
def __init__(
self,
frequency_penalty: Optional[int] = None,
function_call: Optional[Union[str, dict]] = None,
functions: Optional[list] = None,
logit_bias: Optional[dict] = None,
max_tokens: Optional[int] = None,
n: Optional[int] = None,
presence_penalty: Optional[int] = None,
stop: Optional[Union[str, list]] = None,
temperature: Optional[int] = None,
top_p: Optional[int] = None,
response_format: Optional[dict] = None,
tools: Optional[list] = None,
tool_choice: Optional[Union[str, dict]] = None,
) -> None:
locals_ = locals().copy()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return {
k: v
for k, v in cls.__dict__.items()
if not k.startswith("__")
and not isinstance(
v,
(
types.FunctionType,
types.BuiltinFunctionType,
classmethod,
staticmethod,
),
)
and v is not None
}
def get_supported_openai_params(self):
return [
"frequency_penalty",
"function_call",
"functions",
"logit_bias",
"max_tokens",
"n",
"presence_penalty",
"stop",
"temperature",
"top_p",
"response_format",
"tools",
"tool_choice",
]
def map_openai_params(
self, non_default_params: dict, optional_params: dict, model: str
):
supported_openai_params = self.get_supported_openai_params()
for param, value in non_default_params.items():
if (
param == "temperature"
and value == 0
and model == "mistralai/Mistral-7B-Instruct-v0.1"
): # this model does no support temperature == 0
value = 0.0001 # close to 0
if param in supported_openai_params:
optional_params[param] = value
return optional_params return optional_params
@ -197,6 +292,7 @@ class OpenAIConfig:
stop: Optional[Union[str, list]] = None stop: Optional[Union[str, list]] = None
temperature: Optional[int] = None temperature: Optional[int] = None
top_p: Optional[int] = None top_p: Optional[int] = None
response_format: Optional[dict] = None
def __init__( def __init__(
self, self,
@ -210,6 +306,7 @@ class OpenAIConfig:
stop: Optional[Union[str, list]] = None, stop: Optional[Union[str, list]] = None,
temperature: Optional[int] = None, temperature: Optional[int] = None,
top_p: Optional[int] = None, top_p: Optional[int] = None,
response_format: Optional[dict] = None,
) -> None: ) -> None:
locals_ = locals().copy() locals_ = locals().copy()
for key, value in locals_.items(): for key, value in locals_.items():

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[39712,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"608\",\"static/chunks/608-d128caa3cfe973c1.js\",\"931\",\"static/chunks/app/page-e266cb0126026d40.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"dYIEEO-62OCgyckEhgBd-\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[39712,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"608\",\"static/chunks/608-d128caa3cfe973c1.js\",\"931\",\"static/chunks/app/page-76d278f96a0e9768.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"D_ZUmMtLMPSa4aQQUJtKt\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[39712,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","608","static/chunks/608-d128caa3cfe973c1.js","931","static/chunks/app/page-e266cb0126026d40.js"],""] 3:I[39712,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","608","static/chunks/608-d128caa3cfe973c1.js","931","static/chunks/app/page-76d278f96a0e9768.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["dYIEEO-62OCgyckEhgBd-",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5d93d4a9fa59d72f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["D_ZUmMtLMPSa4aQQUJtKt",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5d93d4a9fa59d72f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -984,10 +984,6 @@ class LiteLLM_VerificationToken(LiteLLMBase):
org_id: Optional[str] = None # org id for a given key org_id: Optional[str] = None # org id for a given key
# hidden params used for parallel request limiting, not required to create a token
user_id_rate_limits: Optional[dict] = None
team_id_rate_limits: Optional[dict] = None
class Config: class Config:
protected_namespaces = () protected_namespaces = ()

View file

@ -164,8 +164,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
# check if REQUEST ALLOWED for user_id # check if REQUEST ALLOWED for user_id
user_id = user_api_key_dict.user_id user_id = user_api_key_dict.user_id
if user_id is not None: if user_id is not None:
_user_id_rate_limits = user_api_key_dict.user_id_rate_limits _user_id_rate_limits = await self.user_api_key_cache.async_get_cache(
key=user_id
)
# get user tpm/rpm limits # get user tpm/rpm limits
if _user_id_rate_limits is not None and isinstance( if _user_id_rate_limits is not None and isinstance(
_user_id_rate_limits, dict _user_id_rate_limits, dict
@ -196,13 +197,8 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
## get team tpm/rpm limits ## get team tpm/rpm limits
team_id = user_api_key_dict.team_id team_id = user_api_key_dict.team_id
if team_id is not None: if team_id is not None:
team_tpm_limit = getattr(user_api_key_dict, "team_tpm_limit", sys.maxsize) team_tpm_limit = user_api_key_dict.team_tpm_limit
team_rpm_limit = user_api_key_dict.team_rpm_limit
if team_tpm_limit is None:
team_tpm_limit = sys.maxsize
team_rpm_limit = getattr(user_api_key_dict, "team_rpm_limit", sys.maxsize)
if team_rpm_limit is None:
team_rpm_limit = sys.maxsize
if team_tpm_limit is None: if team_tpm_limit is None:
team_tpm_limit = sys.maxsize team_tpm_limit = sys.maxsize

View file

@ -1,379 +0,0 @@
# What is this?
## Checks TPM/RPM Limits for a key/user/team on the proxy
## Works with Redis - if given
from typing import Optional, Literal
import litellm, traceback, sys
from litellm.caching import DualCache, RedisCache
from litellm.proxy._types import (
UserAPIKeyAuth,
LiteLLM_VerificationTokenView,
LiteLLM_UserTable,
LiteLLM_TeamTable,
)
from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger
from litellm import ModelResponse
from datetime import datetime
class _PROXY_MaxTPMRPMLimiter(CustomLogger):
user_api_key_cache = None
# Class variables or attributes
def __init__(self, internal_cache: Optional[DualCache]):
if internal_cache is None:
self.internal_cache = DualCache()
else:
self.internal_cache = internal_cache
def print_verbose(self, print_statement):
try:
verbose_proxy_logger.debug(print_statement)
if litellm.set_verbose:
print(print_statement) # noqa
except:
pass
## check if admin has set tpm/rpm limits for this key/user/team
def _check_limits_set(
self,
user_api_key_cache: DualCache,
key: Optional[str],
user_id: Optional[str],
team_id: Optional[str],
) -> bool:
## key
if key is not None:
key_val = user_api_key_cache.get_cache(key=key)
if isinstance(key_val, dict):
key_val = LiteLLM_VerificationTokenView(**key_val)
if isinstance(key_val, LiteLLM_VerificationTokenView):
user_api_key_tpm_limit = key_val.tpm_limit
user_api_key_rpm_limit = key_val.rpm_limit
if (
user_api_key_tpm_limit is not None
or user_api_key_rpm_limit is not None
):
return True
## team
if team_id is not None:
team_val = user_api_key_cache.get_cache(key=team_id)
if isinstance(team_val, dict):
team_val = LiteLLM_TeamTable(**team_val)
if isinstance(team_val, LiteLLM_TeamTable):
team_tpm_limit = team_val.tpm_limit
team_rpm_limit = team_val.rpm_limit
if team_tpm_limit is not None or team_rpm_limit is not None:
return True
## user
if user_id is not None:
user_val = user_api_key_cache.get_cache(key=user_id)
if isinstance(user_val, dict):
user_val = LiteLLM_UserTable(**user_val)
if isinstance(user_val, LiteLLM_UserTable):
user_tpm_limit = user_val.tpm_limit
user_rpm_limit = user_val.rpm_limit
if user_tpm_limit is not None or user_rpm_limit is not None:
return True
return False
async def check_key_in_limits(
self,
user_api_key_dict: UserAPIKeyAuth,
current_minute_dict: dict,
tpm_limit: int,
rpm_limit: int,
request_count_api_key: str,
type: Literal["key", "user", "team"],
):
if type == "key" and user_api_key_dict.api_key is not None:
current = current_minute_dict["key"].get(user_api_key_dict.api_key, None)
elif type == "user" and user_api_key_dict.user_id is not None:
current = current_minute_dict["user"].get(user_api_key_dict.user_id, None)
elif type == "team" and user_api_key_dict.team_id is not None:
current = current_minute_dict["team"].get(user_api_key_dict.team_id, None)
else:
return
if current is None:
if tpm_limit == 0 or rpm_limit == 0:
# base case
raise HTTPException(
status_code=429, detail="Max tpm/rpm limit reached."
)
elif current["current_tpm"] < tpm_limit and current["current_rpm"] < rpm_limit:
pass
else:
raise HTTPException(status_code=429, detail="Max tpm/rpm limit reached.")
async def async_pre_call_hook(
self,
user_api_key_dict: UserAPIKeyAuth,
cache: DualCache,
data: dict,
call_type: str,
):
self.print_verbose(
f"Inside Max TPM/RPM Limiter Pre-Call Hook - {user_api_key_dict}"
)
api_key = user_api_key_dict.api_key
# check if REQUEST ALLOWED for user_id
user_id = user_api_key_dict.user_id
## get team tpm/rpm limits
team_id = user_api_key_dict.team_id
self.user_api_key_cache = cache
_set_limits = self._check_limits_set(
user_api_key_cache=cache, key=api_key, user_id=user_id, team_id=team_id
)
self.print_verbose(f"_set_limits: {_set_limits}")
if _set_limits == False:
return
# ------------
# Setup values
# ------------
current_date = datetime.now().strftime("%Y-%m-%d")
current_hour = datetime.now().strftime("%H")
current_minute = datetime.now().strftime("%M")
precise_minute = f"{current_date}-{current_hour}-{current_minute}"
cache_key = "usage:{}".format(precise_minute)
current_minute_dict = await self.internal_cache.async_get_cache(
key=cache_key
) # {"usage:{curr_minute}": {"key": {<api_key>: {"current_requests": 1, "current_tpm": 1, "current_rpm": 10}}}}
if current_minute_dict is None:
current_minute_dict = {"key": {}, "user": {}, "team": {}}
if api_key is not None:
tpm_limit = getattr(user_api_key_dict, "tpm_limit", sys.maxsize)
if tpm_limit is None:
tpm_limit = sys.maxsize
rpm_limit = getattr(user_api_key_dict, "rpm_limit", sys.maxsize)
if rpm_limit is None:
rpm_limit = sys.maxsize
request_count_api_key = f"{api_key}::{precise_minute}::request_count"
await self.check_key_in_limits(
user_api_key_dict=user_api_key_dict,
current_minute_dict=current_minute_dict,
request_count_api_key=request_count_api_key,
tpm_limit=tpm_limit,
rpm_limit=rpm_limit,
type="key",
)
if user_id is not None:
_user_id_rate_limits = user_api_key_dict.user_id_rate_limits
# get user tpm/rpm limits
if _user_id_rate_limits is not None and isinstance(
_user_id_rate_limits, dict
):
user_tpm_limit = _user_id_rate_limits.get("tpm_limit", None)
user_rpm_limit = _user_id_rate_limits.get("rpm_limit", None)
if user_tpm_limit is None:
user_tpm_limit = sys.maxsize
if user_rpm_limit is None:
user_rpm_limit = sys.maxsize
# now do the same tpm/rpm checks
request_count_api_key = f"{user_id}::{precise_minute}::request_count"
# print(f"Checking if {request_count_api_key} is allowed to make request for minute {precise_minute}")
await self.check_key_in_limits(
user_api_key_dict=user_api_key_dict,
current_minute_dict=current_minute_dict,
request_count_api_key=request_count_api_key,
tpm_limit=user_tpm_limit,
rpm_limit=user_rpm_limit,
type="user",
)
# TEAM RATE LIMITS
if team_id is not None:
team_tpm_limit = getattr(user_api_key_dict, "team_tpm_limit", sys.maxsize)
if team_tpm_limit is None:
team_tpm_limit = sys.maxsize
team_rpm_limit = getattr(user_api_key_dict, "team_rpm_limit", sys.maxsize)
if team_rpm_limit is None:
team_rpm_limit = sys.maxsize
if team_tpm_limit is None:
team_tpm_limit = sys.maxsize
if team_rpm_limit is None:
team_rpm_limit = sys.maxsize
# now do the same tpm/rpm checks
request_count_api_key = f"{team_id}::{precise_minute}::request_count"
# print(f"Checking if {request_count_api_key} is allowed to make request for minute {precise_minute}")
await self.check_key_in_limits(
user_api_key_dict=user_api_key_dict,
current_minute_dict=current_minute_dict,
request_count_api_key=request_count_api_key,
tpm_limit=team_tpm_limit,
rpm_limit=team_rpm_limit,
type="team",
)
return
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
try:
self.print_verbose(f"INSIDE TPM RPM Limiter ASYNC SUCCESS LOGGING")
user_api_key = kwargs["litellm_params"]["metadata"]["user_api_key"]
user_api_key_user_id = kwargs["litellm_params"]["metadata"].get(
"user_api_key_user_id", None
)
user_api_key_team_id = kwargs["litellm_params"]["metadata"].get(
"user_api_key_team_id", None
)
_limits_set = self._check_limits_set(
user_api_key_cache=self.user_api_key_cache,
key=user_api_key,
user_id=user_api_key_user_id,
team_id=user_api_key_team_id,
)
if _limits_set == False: # don't waste cache calls if no tpm/rpm limits set
return
# ------------
# Setup values
# ------------
current_date = datetime.now().strftime("%Y-%m-%d")
current_hour = datetime.now().strftime("%H")
current_minute = datetime.now().strftime("%M")
precise_minute = f"{current_date}-{current_hour}-{current_minute}"
total_tokens = 0
if isinstance(response_obj, ModelResponse):
total_tokens = response_obj.usage.total_tokens
"""
- get value from redis
- increment requests + 1
- increment tpm + 1
- increment rpm + 1
- update value in-memory + redis
"""
cache_key = "usage:{}".format(precise_minute)
if (
self.internal_cache.redis_cache is not None
): # get straight from redis if possible
current_minute_dict = (
await self.internal_cache.redis_cache.async_get_cache(
key=cache_key,
)
) # {"usage:{current_minute}": {"key": {}, "team": {}, "user": {}}}
else:
current_minute_dict = await self.internal_cache.async_get_cache(
key=cache_key,
)
if current_minute_dict is None:
current_minute_dict = {"key": {}, "user": {}, "team": {}}
_cache_updated = False # check if a cache update is required. prevent unnecessary rewrites.
# ------------
# Update usage - API Key
# ------------
if user_api_key is not None:
_cache_updated = True
## API KEY ##
if user_api_key in current_minute_dict["key"]:
current_key_usage = current_minute_dict["key"][user_api_key]
new_val = {
"current_tpm": current_key_usage["current_tpm"] + total_tokens,
"current_rpm": current_key_usage["current_rpm"] + 1,
}
else:
new_val = {
"current_tpm": total_tokens,
"current_rpm": 1,
}
current_minute_dict["key"][user_api_key] = new_val
self.print_verbose(
f"updated_value in success call: {new_val}, precise_minute: {precise_minute}"
)
# ------------
# Update usage - User
# ------------
if user_api_key_user_id is not None:
_cache_updated = True
total_tokens = 0
if isinstance(response_obj, ModelResponse):
total_tokens = response_obj.usage.total_tokens
if user_api_key_user_id in current_minute_dict["key"]:
current_key_usage = current_minute_dict["key"][user_api_key_user_id]
new_val = {
"current_tpm": current_key_usage["current_tpm"] + total_tokens,
"current_rpm": current_key_usage["current_rpm"] + 1,
}
else:
new_val = {
"current_tpm": total_tokens,
"current_rpm": 1,
}
current_minute_dict["user"][user_api_key_user_id] = new_val
# ------------
# Update usage - Team
# ------------
if user_api_key_team_id is not None:
_cache_updated = True
total_tokens = 0
if isinstance(response_obj, ModelResponse):
total_tokens = response_obj.usage.total_tokens
if user_api_key_team_id in current_minute_dict["key"]:
current_key_usage = current_minute_dict["key"][user_api_key_team_id]
new_val = {
"current_tpm": current_key_usage["current_tpm"] + total_tokens,
"current_rpm": current_key_usage["current_rpm"] + 1,
}
else:
new_val = {
"current_tpm": total_tokens,
"current_rpm": 1,
}
current_minute_dict["team"][user_api_key_team_id] = new_val
if _cache_updated == True:
await self.internal_cache.async_set_cache(
key=cache_key, value=current_minute_dict
)
except Exception as e:
self.print_verbose("{}\n{}".format(e, traceback.format_exc())) # noqa

View file

@ -397,6 +397,7 @@ def _get_pydantic_json_dict(pydantic_obj: BaseModel) -> dict:
def get_custom_headers( def get_custom_headers(
*, *,
user_api_key_dict: UserAPIKeyAuth,
model_id: Optional[str] = None, model_id: Optional[str] = None,
cache_key: Optional[str] = None, cache_key: Optional[str] = None,
api_base: Optional[str] = None, api_base: Optional[str] = None,
@ -410,6 +411,8 @@ def get_custom_headers(
"x-litellm-model-api-base": api_base, "x-litellm-model-api-base": api_base,
"x-litellm-version": version, "x-litellm-version": version,
"x-litellm-model-region": model_region, "x-litellm-model-region": model_region,
"x-litellm-key-tpm-limit": str(user_api_key_dict.tpm_limit),
"x-litellm-key-rpm-limit": str(user_api_key_dict.rpm_limit),
} }
try: try:
return { return {
@ -2787,6 +2790,13 @@ class ProxyConfig:
model.model_info["id"] = _id model.model_info["id"] = _id
model.model_info["db_model"] = True model.model_info["db_model"] = True
if premium_user is True:
# seeing "created_at", "updated_at", "created_by", "updated_by" is a LiteLLM Enterprise Feature
model.model_info["created_at"] = getattr(model, "created_at", None)
model.model_info["updated_at"] = getattr(model, "updated_at", None)
model.model_info["created_by"] = getattr(model, "created_by", None)
model.model_info["updated_by"] = getattr(model, "updated_by", None)
if model.model_info is not None and isinstance(model.model_info, dict): if model.model_info is not None and isinstance(model.model_info, dict):
if "id" not in model.model_info: if "id" not in model.model_info:
model.model_info["id"] = model.model_id model.model_info["id"] = model.model_id
@ -3072,10 +3082,9 @@ class ProxyConfig:
try: try:
if master_key is None or not isinstance(master_key, str): if master_key is None or not isinstance(master_key, str):
raise Exception( raise ValueError(
f"Master key is not initialized or formatted. master_key={master_key}" f"Master key is not initialized or formatted. master_key={master_key}"
) )
verbose_proxy_logger.debug(f"llm_router: {llm_router}")
new_models = await prisma_client.db.litellm_proxymodeltable.find_many() new_models = await prisma_client.db.litellm_proxymodeltable.find_many()
# update llm router # update llm router
await self._update_llm_router( await self._update_llm_router(
@ -4059,6 +4068,7 @@ async def chat_completion(
"stream" in data and data["stream"] == True "stream" in data and data["stream"] == True
): # use generate_responses to stream responses ): # use generate_responses to stream responses
custom_headers = get_custom_headers( custom_headers = get_custom_headers(
user_api_key_dict=user_api_key_dict,
model_id=model_id, model_id=model_id,
cache_key=cache_key, cache_key=cache_key,
api_base=api_base, api_base=api_base,
@ -4078,6 +4088,7 @@ async def chat_completion(
fastapi_response.headers.update( fastapi_response.headers.update(
get_custom_headers( get_custom_headers(
user_api_key_dict=user_api_key_dict,
model_id=model_id, model_id=model_id,
cache_key=cache_key, cache_key=cache_key,
api_base=api_base, api_base=api_base,
@ -4298,6 +4309,7 @@ async def completion(
"stream" in data and data["stream"] == True "stream" in data and data["stream"] == True
): # use generate_responses to stream responses ): # use generate_responses to stream responses
custom_headers = get_custom_headers( custom_headers = get_custom_headers(
user_api_key_dict=user_api_key_dict,
model_id=model_id, model_id=model_id,
cache_key=cache_key, cache_key=cache_key,
api_base=api_base, api_base=api_base,
@ -4316,6 +4328,7 @@ async def completion(
) )
fastapi_response.headers.update( fastapi_response.headers.update(
get_custom_headers( get_custom_headers(
user_api_key_dict=user_api_key_dict,
model_id=model_id, model_id=model_id,
cache_key=cache_key, cache_key=cache_key,
api_base=api_base, api_base=api_base,
@ -4565,6 +4578,7 @@ async def embeddings(
fastapi_response.headers.update( fastapi_response.headers.update(
get_custom_headers( get_custom_headers(
user_api_key_dict=user_api_key_dict,
model_id=model_id, model_id=model_id,
cache_key=cache_key, cache_key=cache_key,
api_base=api_base, api_base=api_base,
@ -4748,6 +4762,7 @@ async def image_generation(
fastapi_response.headers.update( fastapi_response.headers.update(
get_custom_headers( get_custom_headers(
user_api_key_dict=user_api_key_dict,
model_id=model_id, model_id=model_id,
cache_key=cache_key, cache_key=cache_key,
api_base=api_base, api_base=api_base,
@ -4949,6 +4964,7 @@ async def audio_transcriptions(
fastapi_response.headers.update( fastapi_response.headers.update(
get_custom_headers( get_custom_headers(
user_api_key_dict=user_api_key_dict,
model_id=model_id, model_id=model_id,
cache_key=cache_key, cache_key=cache_key,
api_base=api_base, api_base=api_base,
@ -5132,6 +5148,7 @@ async def moderations(
fastapi_response.headers.update( fastapi_response.headers.update(
get_custom_headers( get_custom_headers(
user_api_key_dict=user_api_key_dict,
model_id=model_id, model_id=model_id,
cache_key=cache_key, cache_key=cache_key,
api_base=api_base, api_base=api_base,
@ -6083,7 +6100,7 @@ async def get_global_activity_model(
sql_query = """ sql_query = """
SELECT SELECT
model, model_group AS model,
date_trunc('day', "startTime") AS date, date_trunc('day', "startTime") AS date,
COUNT(*) AS api_requests, COUNT(*) AS api_requests,
SUM(total_tokens) AS total_tokens SUM(total_tokens) AS total_tokens

View file

@ -35,7 +35,6 @@ from litellm import (
) )
from litellm.utils import ModelResponseIterator from litellm.utils import ModelResponseIterator
from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
from litellm.proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter
from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy.db.base_client import CustomDB from litellm.proxy.db.base_client import CustomDB
@ -81,9 +80,6 @@ class ProxyLogging:
self.call_details["user_api_key_cache"] = user_api_key_cache self.call_details["user_api_key_cache"] = user_api_key_cache
self.internal_usage_cache = DualCache() self.internal_usage_cache = DualCache()
self.max_parallel_request_limiter = _PROXY_MaxParallelRequestsHandler() self.max_parallel_request_limiter = _PROXY_MaxParallelRequestsHandler()
self.max_tpm_rpm_limiter = _PROXY_MaxTPMRPMLimiter(
internal_cache=self.internal_usage_cache
)
self.max_budget_limiter = _PROXY_MaxBudgetLimiter() self.max_budget_limiter = _PROXY_MaxBudgetLimiter()
self.cache_control_check = _PROXY_CacheControlCheck() self.cache_control_check = _PROXY_CacheControlCheck()
self.alerting: Optional[List] = None self.alerting: Optional[List] = None
@ -144,7 +140,6 @@ class ProxyLogging:
print_verbose(f"INITIALIZING LITELLM CALLBACKS!") print_verbose(f"INITIALIZING LITELLM CALLBACKS!")
self.service_logging_obj = ServiceLogging() self.service_logging_obj = ServiceLogging()
litellm.callbacks.append(self.max_parallel_request_limiter) litellm.callbacks.append(self.max_parallel_request_limiter)
litellm.callbacks.append(self.max_tpm_rpm_limiter)
litellm.callbacks.append(self.max_budget_limiter) litellm.callbacks.append(self.max_budget_limiter)
litellm.callbacks.append(self.cache_control_check) litellm.callbacks.append(self.cache_control_check)
litellm.callbacks.append(self.service_logging_obj) litellm.callbacks.append(self.service_logging_obj)

View file

@ -38,6 +38,7 @@ from litellm.utils import (
import copy import copy
from litellm._logging import verbose_router_logger from litellm._logging import verbose_router_logger
import logging import logging
from litellm.types.utils import ModelInfo as ModelMapInfo
from litellm.types.router import ( from litellm.types.router import (
Deployment, Deployment,
ModelInfo, ModelInfo,
@ -349,17 +350,13 @@ class Router:
def validate_fallbacks(self, fallback_param: Optional[List]): def validate_fallbacks(self, fallback_param: Optional[List]):
if fallback_param is None: if fallback_param is None:
return return
if len(fallback_param) > 0: # if set
## for dictionary in list, check if only 1 key in dict for fallback_dict in fallback_param:
for _dict in fallback_param: if not isinstance(fallback_dict, dict):
assert isinstance(_dict, dict), "Item={}, not a dictionary".format( raise ValueError(f"Item '{fallback_dict}' is not a dictionary.")
_dict if len(fallback_dict) != 1:
) raise ValueError(
assert ( f"Dictionary '{fallback_dict}' must have exactly one key, but has {len(fallback_dict)} keys.")
len(_dict.keys()) == 1
), "Only 1 key allows in dictionary. You set={} for dict={}".format(
len(_dict.keys()), _dict
)
def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict): def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict):
if routing_strategy == "least-busy": if routing_strategy == "least-busy":
@ -3065,16 +3062,31 @@ class Router:
try: try:
model_info = litellm.get_model_info(model=litellm_params.model) model_info = litellm.get_model_info(model=litellm_params.model)
except Exception as e: except Exception as e:
continue model_info = None
# get llm provider # get llm provider
try: try:
model, llm_provider, _, _ = litellm.get_llm_provider( model, llm_provider, _, _ = litellm.get_llm_provider(
model=litellm_params.model, model=litellm_params.model,
custom_llm_provider=litellm_params.custom_llm_provider, custom_llm_provider=litellm_params.custom_llm_provider,
) )
except Exception as e: except litellm.exceptions.BadRequestError as e:
continue continue
if model_info is None:
supported_openai_params = litellm.get_supported_openai_params(
model=model, custom_llm_provider=llm_provider
)
model_info = ModelMapInfo(
max_tokens=None,
max_input_tokens=None,
max_output_tokens=None,
input_cost_per_token=0,
output_cost_per_token=0,
litellm_provider=llm_provider,
mode="chat",
supported_openai_params=supported_openai_params,
)
if model_group_info is None: if model_group_info is None:
model_group_info = ModelGroupInfo( model_group_info = ModelGroupInfo(
model_group=model_group, providers=[llm_provider], **model_info # type: ignore model_group=model_group, providers=[llm_provider], **model_info # type: ignore
@ -3089,18 +3101,26 @@ class Router:
# supports_function_calling == True # supports_function_calling == True
if llm_provider not in model_group_info.providers: if llm_provider not in model_group_info.providers:
model_group_info.providers.append(llm_provider) model_group_info.providers.append(llm_provider)
if model_info.get("max_input_tokens", None) is not None and ( if (
model_group_info.max_input_tokens is None model_info.get("max_input_tokens", None) is not None
or model_info["max_input_tokens"] and model_info["max_input_tokens"] is not None
> model_group_info.max_input_tokens and (
model_group_info.max_input_tokens is None
or model_info["max_input_tokens"]
> model_group_info.max_input_tokens
)
): ):
model_group_info.max_input_tokens = model_info[ model_group_info.max_input_tokens = model_info[
"max_input_tokens" "max_input_tokens"
] ]
if model_info.get("max_output_tokens", None) is not None and ( if (
model_group_info.max_output_tokens is None model_info.get("max_output_tokens", None) is not None
or model_info["max_output_tokens"] and model_info["max_output_tokens"] is not None
> model_group_info.max_output_tokens and (
model_group_info.max_output_tokens is None
or model_info["max_output_tokens"]
> model_group_info.max_output_tokens
)
): ):
model_group_info.max_output_tokens = model_info[ model_group_info.max_output_tokens = model_info[
"max_output_tokens" "max_output_tokens"
@ -3124,19 +3144,26 @@ class Router:
if ( if (
model_info.get("supports_parallel_function_calling", None) model_info.get("supports_parallel_function_calling", None)
is not None is not None
and model_info["supports_parallel_function_calling"] == True # type: ignore and model_info["supports_parallel_function_calling"] is True # type: ignore
): ):
model_group_info.supports_parallel_function_calling = True model_group_info.supports_parallel_function_calling = True
if ( if (
model_info.get("supports_vision", None) is not None model_info.get("supports_vision", None) is not None
and model_info["supports_vision"] == True # type: ignore and model_info["supports_vision"] is True # type: ignore
): ):
model_group_info.supports_vision = True model_group_info.supports_vision = True
if ( if (
model_info.get("supports_function_calling", None) is not None model_info.get("supports_function_calling", None) is not None
and model_info["supports_function_calling"] == True # type: ignore and model_info["supports_function_calling"] is True # type: ignore
): ):
model_group_info.supports_function_calling = True model_group_info.supports_function_calling = True
if (
model_info.get("supported_openai_params", None) is not None
and model_info["supported_openai_params"] is not None
):
model_group_info.supported_openai_params = model_info[
"supported_openai_params"
]
return model_group_info return model_group_info

View file

@ -1,162 +1,163 @@
### REPLACED BY 'test_parallel_request_limiter.py' ###
# What is this? # What is this?
## Unit tests for the max tpm / rpm limiter hook for proxy ## Unit tests for the max tpm / rpm limiter hook for proxy
import sys, os, asyncio, time, random # import sys, os, asyncio, time, random
from datetime import datetime # from datetime import datetime
import traceback # import traceback
from dotenv import load_dotenv # from dotenv import load_dotenv
from typing import Optional # from typing import Optional
load_dotenv() # load_dotenv()
import os # import os
sys.path.insert( # sys.path.insert(
0, os.path.abspath("../..") # 0, os.path.abspath("../..")
) # Adds the parent directory to the system path # ) # Adds the parent directory to the system path
import pytest # import pytest
import litellm # import litellm
from litellm import Router # from litellm import Router
from litellm.proxy.utils import ProxyLogging, hash_token # from litellm.proxy.utils import ProxyLogging, hash_token
from litellm.proxy._types import UserAPIKeyAuth # from litellm.proxy._types import UserAPIKeyAuth
from litellm.caching import DualCache, RedisCache # from litellm.caching import DualCache, RedisCache
from litellm.proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter # from litellm.proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter
from datetime import datetime # from datetime import datetime
@pytest.mark.asyncio # @pytest.mark.asyncio
async def test_pre_call_hook_rpm_limits(): # async def test_pre_call_hook_rpm_limits():
""" # """
Test if error raised on hitting rpm limits # Test if error raised on hitting rpm limits
""" # """
litellm.set_verbose = True # litellm.set_verbose = True
_api_key = hash_token("sk-12345") # _api_key = hash_token("sk-12345")
user_api_key_dict = UserAPIKeyAuth(api_key=_api_key, tpm_limit=9, rpm_limit=1) # user_api_key_dict = UserAPIKeyAuth(api_key=_api_key, tpm_limit=9, rpm_limit=1)
local_cache = DualCache() # local_cache = DualCache()
# redis_usage_cache = RedisCache() # # redis_usage_cache = RedisCache()
local_cache.set_cache( # local_cache.set_cache(
key=_api_key, value={"api_key": _api_key, "tpm_limit": 9, "rpm_limit": 1} # key=_api_key, value={"api_key": _api_key, "tpm_limit": 9, "rpm_limit": 1}
) # )
tpm_rpm_limiter = _PROXY_MaxTPMRPMLimiter(internal_cache=DualCache()) # tpm_rpm_limiter = _PROXY_MaxTPMRPMLimiter(internal_cache=DualCache())
await tpm_rpm_limiter.async_pre_call_hook( # await tpm_rpm_limiter.async_pre_call_hook(
user_api_key_dict=user_api_key_dict, cache=local_cache, data={}, call_type="" # user_api_key_dict=user_api_key_dict, cache=local_cache, data={}, call_type=""
) # )
kwargs = {"litellm_params": {"metadata": {"user_api_key": _api_key}}} # kwargs = {"litellm_params": {"metadata": {"user_api_key": _api_key}}}
await tpm_rpm_limiter.async_log_success_event( # await tpm_rpm_limiter.async_log_success_event(
kwargs=kwargs, # kwargs=kwargs,
response_obj="", # response_obj="",
start_time="", # start_time="",
end_time="", # end_time="",
) # )
## Expected cache val: {"current_requests": 0, "current_tpm": 0, "current_rpm": 1} # ## Expected cache val: {"current_requests": 0, "current_tpm": 0, "current_rpm": 1}
try: # try:
await tpm_rpm_limiter.async_pre_call_hook( # await tpm_rpm_limiter.async_pre_call_hook(
user_api_key_dict=user_api_key_dict, # user_api_key_dict=user_api_key_dict,
cache=local_cache, # cache=local_cache,
data={}, # data={},
call_type="", # call_type="",
) # )
pytest.fail(f"Expected call to fail") # pytest.fail(f"Expected call to fail")
except Exception as e: # except Exception as e:
assert e.status_code == 429 # assert e.status_code == 429
@pytest.mark.asyncio # @pytest.mark.asyncio
async def test_pre_call_hook_team_rpm_limits( # async def test_pre_call_hook_team_rpm_limits(
_redis_usage_cache: Optional[RedisCache] = None, # _redis_usage_cache: Optional[RedisCache] = None,
): # ):
""" # """
Test if error raised on hitting team rpm limits # Test if error raised on hitting team rpm limits
""" # """
litellm.set_verbose = True # litellm.set_verbose = True
_api_key = "sk-12345" # _api_key = "sk-12345"
_team_id = "unique-team-id" # _team_id = "unique-team-id"
_user_api_key_dict = { # _user_api_key_dict = {
"api_key": _api_key, # "api_key": _api_key,
"max_parallel_requests": 1, # "max_parallel_requests": 1,
"tpm_limit": 9, # "tpm_limit": 9,
"rpm_limit": 10, # "rpm_limit": 10,
"team_rpm_limit": 1, # "team_rpm_limit": 1,
"team_id": _team_id, # "team_id": _team_id,
} # }
user_api_key_dict = UserAPIKeyAuth(**_user_api_key_dict) # type: ignore # user_api_key_dict = UserAPIKeyAuth(**_user_api_key_dict) # type: ignore
_api_key = hash_token(_api_key) # _api_key = hash_token(_api_key)
local_cache = DualCache() # local_cache = DualCache()
local_cache.set_cache(key=_api_key, value=_user_api_key_dict) # local_cache.set_cache(key=_api_key, value=_user_api_key_dict)
internal_cache = DualCache(redis_cache=_redis_usage_cache) # internal_cache = DualCache(redis_cache=_redis_usage_cache)
tpm_rpm_limiter = _PROXY_MaxTPMRPMLimiter(internal_cache=internal_cache) # tpm_rpm_limiter = _PROXY_MaxTPMRPMLimiter(internal_cache=internal_cache)
await tpm_rpm_limiter.async_pre_call_hook( # await tpm_rpm_limiter.async_pre_call_hook(
user_api_key_dict=user_api_key_dict, cache=local_cache, data={}, call_type="" # user_api_key_dict=user_api_key_dict, cache=local_cache, data={}, call_type=""
) # )
kwargs = { # kwargs = {
"litellm_params": { # "litellm_params": {
"metadata": {"user_api_key": _api_key, "user_api_key_team_id": _team_id} # "metadata": {"user_api_key": _api_key, "user_api_key_team_id": _team_id}
} # }
} # }
await tpm_rpm_limiter.async_log_success_event( # await tpm_rpm_limiter.async_log_success_event(
kwargs=kwargs, # kwargs=kwargs,
response_obj="", # response_obj="",
start_time="", # start_time="",
end_time="", # end_time="",
) # )
print(f"local_cache: {local_cache}") # print(f"local_cache: {local_cache}")
## Expected cache val: {"current_requests": 0, "current_tpm": 0, "current_rpm": 1} # ## Expected cache val: {"current_requests": 0, "current_tpm": 0, "current_rpm": 1}
try: # try:
await tpm_rpm_limiter.async_pre_call_hook( # await tpm_rpm_limiter.async_pre_call_hook(
user_api_key_dict=user_api_key_dict, # user_api_key_dict=user_api_key_dict,
cache=local_cache, # cache=local_cache,
data={}, # data={},
call_type="", # call_type="",
) # )
pytest.fail(f"Expected call to fail") # pytest.fail(f"Expected call to fail")
except Exception as e: # except Exception as e:
assert e.status_code == 429 # type: ignore # assert e.status_code == 429 # type: ignore
@pytest.mark.asyncio # @pytest.mark.asyncio
async def test_namespace(): # async def test_namespace():
""" # """
- test if default namespace set via `proxyconfig._init_cache` # - test if default namespace set via `proxyconfig._init_cache`
- respected for tpm/rpm caching # - respected for tpm/rpm caching
""" # """
from litellm.proxy.proxy_server import ProxyConfig # from litellm.proxy.proxy_server import ProxyConfig
redis_usage_cache: Optional[RedisCache] = None # redis_usage_cache: Optional[RedisCache] = None
cache_params = {"type": "redis", "namespace": "litellm_default"} # cache_params = {"type": "redis", "namespace": "litellm_default"}
## INIT CACHE ## # ## INIT CACHE ##
proxy_config = ProxyConfig() # proxy_config = ProxyConfig()
setattr(litellm.proxy.proxy_server, "proxy_config", proxy_config) # setattr(litellm.proxy.proxy_server, "proxy_config", proxy_config)
proxy_config._init_cache(cache_params=cache_params) # proxy_config._init_cache(cache_params=cache_params)
redis_cache: Optional[RedisCache] = getattr( # redis_cache: Optional[RedisCache] = getattr(
litellm.proxy.proxy_server, "redis_usage_cache" # litellm.proxy.proxy_server, "redis_usage_cache"
) # )
## CHECK IF NAMESPACE SET ## # ## CHECK IF NAMESPACE SET ##
assert redis_cache.namespace == "litellm_default" # assert redis_cache.namespace == "litellm_default"
## CHECK IF TPM/RPM RATE LIMITING WORKS ## # ## CHECK IF TPM/RPM RATE LIMITING WORKS ##
await test_pre_call_hook_team_rpm_limits(_redis_usage_cache=redis_cache) # await test_pre_call_hook_team_rpm_limits(_redis_usage_cache=redis_cache)
current_date = datetime.now().strftime("%Y-%m-%d") # current_date = datetime.now().strftime("%Y-%m-%d")
current_hour = datetime.now().strftime("%H") # current_hour = datetime.now().strftime("%H")
current_minute = datetime.now().strftime("%M") # current_minute = datetime.now().strftime("%M")
precise_minute = f"{current_date}-{current_hour}-{current_minute}" # precise_minute = f"{current_date}-{current_hour}-{current_minute}"
cache_key = "litellm_default:usage:{}".format(precise_minute) # cache_key = "litellm_default:usage:{}".format(precise_minute)
value = await redis_cache.async_get_cache(key=cache_key) # value = await redis_cache.async_get_cache(key=cache_key)
assert value is not None # assert value is not None

View file

@ -229,17 +229,21 @@ async def test_pre_call_hook_user_tpm_limits():
""" """
Test if error raised on hitting tpm limits Test if error raised on hitting tpm limits
""" """
local_cache = DualCache()
# create user with tpm/rpm limits # create user with tpm/rpm limits
user_id = "test-user"
user_obj = {"tpm_limit": 9, "rpm_limit": 10}
local_cache.set_cache(key=user_id, value=user_obj)
_api_key = "sk-12345" _api_key = "sk-12345"
user_api_key_dict = UserAPIKeyAuth( user_api_key_dict = UserAPIKeyAuth(
api_key=_api_key, api_key=_api_key,
user_id="ishaan", user_id=user_id,
user_id_rate_limits={"tpm_limit": 9, "rpm_limit": 10},
) )
res = dict(user_api_key_dict) res = dict(user_api_key_dict)
print("dict user", res) print("dict user", res)
local_cache = DualCache()
parallel_request_handler = MaxParallelRequestsHandler() parallel_request_handler = MaxParallelRequestsHandler()
await parallel_request_handler.async_pre_call_hook( await parallel_request_handler.async_pre_call_hook(
@ -248,7 +252,7 @@ async def test_pre_call_hook_user_tpm_limits():
kwargs = { kwargs = {
"litellm_params": { "litellm_params": {
"metadata": {"user_api_key_user_id": "ishaan", "user_api_key": "gm"} "metadata": {"user_api_key_user_id": user_id, "user_api_key": "gm"}
} }
} }
@ -734,7 +738,7 @@ async def test_bad_router_call():
request_count_api_key = f"{_api_key}::{precise_minute}::request_count" request_count_api_key = f"{_api_key}::{precise_minute}::request_count"
assert ( assert (
parallel_request_handler.user_api_key_cache.get_cache( parallel_request_handler.user_api_key_cache.get_cache( # type: ignore
key=request_count_api_key key=request_count_api_key
)["current_requests"] )["current_requests"]
== 1 == 1
@ -751,7 +755,7 @@ async def test_bad_router_call():
except: except:
pass pass
assert ( assert (
parallel_request_handler.user_api_key_cache.get_cache( parallel_request_handler.user_api_key_cache.get_cache( # type: ignore
key=request_count_api_key key=request_count_api_key
)["current_requests"] )["current_requests"]
== 0 == 0

View file

@ -1,9 +1,15 @@
"""
litellm.Router Types - includes RouterConfig, UpdateRouterConfig, ModelInfo etc
"""
from typing import List, Optional, Union, Dict, Tuple, Literal, TypedDict from typing import List, Optional, Union, Dict, Tuple, Literal, TypedDict
import uuid
import enum
import httpx import httpx
from pydantic import BaseModel, validator, Field from pydantic import BaseModel, Field
import datetime
from .completion import CompletionRequest from .completion import CompletionRequest
from .embedding import EmbeddingRequest from .embedding import EmbeddingRequest
import uuid, enum
class ModelConfig(BaseModel): class ModelConfig(BaseModel):
@ -76,6 +82,12 @@ class ModelInfo(BaseModel):
db_model: bool = ( db_model: bool = (
False # used for proxy - to separate models which are stored in the db vs. config. False # used for proxy - to separate models which are stored in the db vs. config.
) )
updated_at: Optional[datetime.datetime] = None
updated_by: Optional[str] = None
created_at: Optional[datetime.datetime] = None
created_by: Optional[str] = None
base_model: Optional[str] = ( base_model: Optional[str] = (
None # specify if the base model is azure/gpt-3.5-turbo etc for accurate cost tracking None # specify if the base model is azure/gpt-3.5-turbo etc for accurate cost tracking
) )
@ -426,3 +438,4 @@ class ModelGroupInfo(BaseModel):
supports_parallel_function_calling: bool = Field(default=False) supports_parallel_function_calling: bool = Field(default=False)
supports_vision: bool = Field(default=False) supports_vision: bool = Field(default=False)
supports_function_calling: bool = Field(default=False) supports_function_calling: bool = Field(default=False)
supported_openai_params: List[str] = Field(default=[])

View file

@ -15,10 +15,17 @@ class ProviderField(TypedDict):
class ModelInfo(TypedDict): class ModelInfo(TypedDict):
max_tokens: int """
max_input_tokens: int Model info for a given model, this is information found in litellm.model_prices_and_context_window.json
max_output_tokens: int """
max_tokens: Optional[int]
max_input_tokens: Optional[int]
max_output_tokens: Optional[int]
input_cost_per_token: float input_cost_per_token: float
output_cost_per_token: float output_cost_per_token: float
litellm_provider: str litellm_provider: str
mode: str mode: Literal[
"completion", "embedding", "image_generation", "chat", "audio_transcription"
]
supported_openai_params: Optional[List[str]]

View file

@ -5797,30 +5797,11 @@ def get_optional_params(
model=model, custom_llm_provider=custom_llm_provider model=model, custom_llm_provider=custom_llm_provider
) )
_check_valid_arg(supported_params=supported_params) _check_valid_arg(supported_params=supported_params)
if temperature is not None: optional_params = litellm.DeepInfraConfig().map_openai_params(
if ( non_default_params=non_default_params,
temperature == 0 and model == "mistralai/Mistral-7B-Instruct-v0.1" optional_params=optional_params,
): # this model does no support temperature == 0 model=model,
temperature = 0.0001 # close to 0 )
optional_params["temperature"] = temperature
if top_p:
optional_params["top_p"] = top_p
if n:
optional_params["n"] = n
if stream:
optional_params["stream"] = stream
if stop:
optional_params["stop"] = stop
if max_tokens:
optional_params["max_tokens"] = max_tokens
if presence_penalty:
optional_params["presence_penalty"] = presence_penalty
if frequency_penalty:
optional_params["frequency_penalty"] = frequency_penalty
if logit_bias:
optional_params["logit_bias"] = logit_bias
if user:
optional_params["user"] = user
elif custom_llm_provider == "perplexity": elif custom_llm_provider == "perplexity":
supported_params = get_supported_openai_params( supported_params = get_supported_openai_params(
model=model, custom_llm_provider=custom_llm_provider model=model, custom_llm_provider=custom_llm_provider
@ -6604,19 +6585,7 @@ def get_supported_openai_params(
elif custom_llm_provider == "petals": elif custom_llm_provider == "petals":
return ["max_tokens", "temperature", "top_p", "stream"] return ["max_tokens", "temperature", "top_p", "stream"]
elif custom_llm_provider == "deepinfra": elif custom_llm_provider == "deepinfra":
return [ return litellm.DeepInfraConfig().get_supported_openai_params()
"temperature",
"top_p",
"n",
"stream",
"stop",
"max_tokens",
"presence_penalty",
"frequency_penalty",
"logit_bias",
"user",
"response_format",
]
elif custom_llm_provider == "perplexity": elif custom_llm_provider == "perplexity":
return [ return [
"temperature", "temperature",
@ -7107,6 +7076,7 @@ def get_model_info(model: str) -> ModelInfo:
- output_cost_per_token (float): The cost per token for output. - output_cost_per_token (float): The cost per token for output.
- litellm_provider (str): The provider of the model (e.g., "openai"). - litellm_provider (str): The provider of the model (e.g., "openai").
- mode (str): The mode of the model (e.g., "chat" or "completion"). - mode (str): The mode of the model (e.g., "chat" or "completion").
- supported_openai_params (List[str]): A list of supported OpenAI parameters for the model.
Raises: Raises:
Exception: If the model is not mapped yet. Exception: If the model is not mapped yet.
@ -7118,9 +7088,11 @@ def get_model_info(model: str) -> ModelInfo:
"input_cost_per_token": 0.00003, "input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006, "output_cost_per_token": 0.00006,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat",
"supported_openai_params": ["temperature", "max_tokens", "top_p", "frequency_penalty", "presence_penalty"]
} }
""" """
supported_openai_params: Union[List[str], None] = []
def _get_max_position_embeddings(model_name): def _get_max_position_embeddings(model_name):
# Construct the URL for the config.json file # Construct the URL for the config.json file
@ -7148,9 +7120,18 @@ def get_model_info(model: str) -> ModelInfo:
azure_llms = litellm.azure_llms azure_llms = litellm.azure_llms
if model in azure_llms: if model in azure_llms:
model = azure_llms[model] model = azure_llms[model]
if model in litellm.model_cost: ##########################
return litellm.model_cost[model] # Get custom_llm_provider
model, custom_llm_provider, _, _ = get_llm_provider(model=model) split_model, custom_llm_provider = model, ""
try:
split_model, custom_llm_provider, _, _ = get_llm_provider(model=model)
except:
pass
#########################
supported_openai_params = litellm.get_supported_openai_params(
model=model, custom_llm_provider=custom_llm_provider
)
if custom_llm_provider == "huggingface": if custom_llm_provider == "huggingface":
max_tokens = _get_max_position_embeddings(model_name=model) max_tokens = _get_max_position_embeddings(model_name=model)
return { return {
@ -7159,15 +7140,26 @@ def get_model_info(model: str) -> ModelInfo:
"output_cost_per_token": 0, "output_cost_per_token": 0,
"litellm_provider": "huggingface", "litellm_provider": "huggingface",
"mode": "chat", "mode": "chat",
"supported_openai_params": supported_openai_params,
} }
else: else:
""" """
Check if model in model cost map Check if:
1. 'model' in litellm.model_cost. Checks "groq/llama3-8b-8192" in litellm.model_cost
2. 'split_model' in litellm.model_cost. Checks "llama3-8b-8192" in litellm.model_cost
""" """
if model in litellm.model_cost: if model in litellm.model_cost:
return litellm.model_cost[model] _model_info = litellm.model_cost[model]
_model_info["supported_openai_params"] = supported_openai_params
return _model_info
if split_model in litellm.model_cost:
_model_info = litellm.model_cost[split_model]
_model_info["supported_openai_params"] = supported_openai_params
return _model_info
else: else:
raise Exception() raise ValueError(
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
)
except: except:
raise Exception( raise Exception(
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json" "This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "1.38.10" version = "1.38.11"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT" license = "MIT"
@ -79,7 +79,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.commitizen] [tool.commitizen]
version = "1.38.10" version = "1.38.11"
version_files = [ version_files = [
"pyproject.toml:^version" "pyproject.toml:^version"
] ]

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[39712,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"608\",\"static/chunks/608-d128caa3cfe973c1.js\",\"931\",\"static/chunks/app/page-e266cb0126026d40.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"dYIEEO-62OCgyckEhgBd-\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[39712,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"608\",\"static/chunks/608-d128caa3cfe973c1.js\",\"931\",\"static/chunks/app/page-76d278f96a0e9768.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"D_ZUmMtLMPSa4aQQUJtKt\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[39712,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","608","static/chunks/608-d128caa3cfe973c1.js","931","static/chunks/app/page-e266cb0126026d40.js"],""] 3:I[39712,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","608","static/chunks/608-d128caa3cfe973c1.js","931","static/chunks/app/page-76d278f96a0e9768.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["dYIEEO-62OCgyckEhgBd-",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5d93d4a9fa59d72f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["D_ZUmMtLMPSa4aQQUJtKt",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5d93d4a9fa59d72f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -86,6 +86,8 @@ import type { UploadProps } from "antd";
import { Upload } from "antd"; import { Upload } from "antd";
import TimeToFirstToken from "./model_metrics/time_to_first_token"; import TimeToFirstToken from "./model_metrics/time_to_first_token";
import DynamicFields from "./model_add/dynamic_form"; import DynamicFields from "./model_add/dynamic_form";
import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
interface ModelDashboardProps { interface ModelDashboardProps {
accessToken: string | null; accessToken: string | null;
token: string | null; token: string | null;
@ -269,6 +271,8 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
const [selectedProvider, setSelectedProvider] = useState<String>("OpenAI"); const [selectedProvider, setSelectedProvider] = useState<String>("OpenAI");
const [healthCheckResponse, setHealthCheckResponse] = useState<string>(""); const [healthCheckResponse, setHealthCheckResponse] = useState<string>("");
const [editModalVisible, setEditModalVisible] = useState<boolean>(false); const [editModalVisible, setEditModalVisible] = useState<boolean>(false);
const [infoModalVisible, setInfoModalVisible] = useState<boolean>(false);
const [selectedModel, setSelectedModel] = useState<any>(null); const [selectedModel, setSelectedModel] = useState<any>(null);
const [availableModelGroups, setAvailableModelGroups] = useState< const [availableModelGroups, setAvailableModelGroups] = useState<
Array<string> Array<string>
@ -297,6 +301,15 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
useState<RetryPolicyObject | null>(null); useState<RetryPolicyObject | null>(null);
const [defaultRetry, setDefaultRetry] = useState<number>(0); const [defaultRetry, setDefaultRetry] = useState<number>(0);
function formatCreatedAt(createdAt: string | null) {
if (createdAt) {
const date = new Date(createdAt);
const options = { month: 'long', day: 'numeric', year: 'numeric' };
return date.toLocaleDateString('en-US');
}
return null;
}
const EditModelModal: React.FC<EditModelModalProps> = ({ const EditModelModal: React.FC<EditModelModalProps> = ({
visible, visible,
onCancel, onCancel,
@ -423,11 +436,21 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
setEditModalVisible(true); setEditModalVisible(true);
}; };
const handleInfoClick = (model: any) => {
setSelectedModel(model);
setInfoModalVisible(true);
};
const handleEditCancel = () => { const handleEditCancel = () => {
setEditModalVisible(false); setEditModalVisible(false);
setSelectedModel(null); setSelectedModel(null);
}; };
const handleInfoCancel = () => {
setInfoModalVisible(false);
setSelectedModel(null);
};
const handleEditSubmit = async (formValues: Record<string, any>) => { const handleEditSubmit = async (formValues: Record<string, any>) => {
// Call API to update team with teamId and values // Call API to update team with teamId and values
@ -1039,7 +1062,6 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
</div> </div>
<Card> <Card>
<Table <Table
className="mt-5"
style={{ maxWidth: "1500px", width: "100%" }} style={{ maxWidth: "1500px", width: "100%" }}
> >
<TableHead> <TableHead>
@ -1049,6 +1071,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
maxWidth: "150px", maxWidth: "150px",
whiteSpace: "normal", whiteSpace: "normal",
wordBreak: "break-word", wordBreak: "break-word",
fontSize: "11px"
}} }}
> >
Public Model Name Public Model Name
@ -1058,6 +1081,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
maxWidth: "100px", maxWidth: "100px",
whiteSpace: "normal", whiteSpace: "normal",
wordBreak: "break-word", wordBreak: "break-word",
fontSize: "11px"
}} }}
> >
Provider Provider
@ -1068,25 +1092,18 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
maxWidth: "150px", maxWidth: "150px",
whiteSpace: "normal", whiteSpace: "normal",
wordBreak: "break-word", wordBreak: "break-word",
fontSize: "11px"
}} }}
> >
API Base API Base
</TableHeaderCell> </TableHeaderCell>
)} )}
<TableHeaderCell
style={{
maxWidth: "200px",
whiteSpace: "normal",
wordBreak: "break-word",
}}
>
Extra litellm Params
</TableHeaderCell>
<TableHeaderCell <TableHeaderCell
style={{ style={{
maxWidth: "85px", maxWidth: "85px",
whiteSpace: "normal", whiteSpace: "normal",
wordBreak: "break-word", wordBreak: "break-word",
fontSize: "11px"
}} }}
> >
Input Price{" "} Input Price{" "}
@ -1099,6 +1116,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
maxWidth: "85px", maxWidth: "85px",
whiteSpace: "normal", whiteSpace: "normal",
wordBreak: "break-word", wordBreak: "break-word",
fontSize: "11px"
}} }}
> >
Output Price{" "} Output Price{" "}
@ -1106,24 +1124,45 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
/1M Tokens ($) /1M Tokens ($)
</p> </p>
</TableHeaderCell> </TableHeaderCell>
<TableHeaderCell <TableHeaderCell
style={{ style={{
maxWidth: "120px", maxWidth: "100px",
whiteSpace: "normal", whiteSpace: "normal",
wordBreak: "break-word", wordBreak: "break-word",
fontSize: "11px"
}} }}
> >
Max Tokens {
premiumUser ? "Created At" : <a href="https://forms.gle/W3U4PZpJGFHWtHyA9" target="_blank" style={{color: "#72bcd4" }}> Created At</a>
}
</TableHeaderCell>
<TableHeaderCell
style={{
maxWidth: "100px",
whiteSpace: "normal",
wordBreak: "break-word",
fontSize: "11px"
}}
>
{
premiumUser ? "Created By" : <a href="https://forms.gle/W3U4PZpJGFHWtHyA9" target="_blank" style={{color: "#72bcd4" }}> Created By</a>
}
</TableHeaderCell> </TableHeaderCell>
<TableHeaderCell <TableHeaderCell
style={{ style={{
maxWidth: "50px", maxWidth: "50px",
whiteSpace: "normal", whiteSpace: "normal",
wordBreak: "break-word", wordBreak: "break-word",
fontSize: "11px"
}} }}
> >
Status Status
</TableHeaderCell> </TableHeaderCell>
<TableHeaderCell>
</TableHeaderCell>
</TableRow> </TableRow>
</TableHead> </TableHead>
<TableBody> <TableBody>
@ -1137,15 +1176,17 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
selectedModelGroup === "" selectedModelGroup === ""
) )
.map((model: any, index: number) => ( .map((model: any, index: number) => (
<TableRow key={index}> <TableRow key={index} style={{ maxHeight: "1px", minHeight: "1px" }}>
<TableCell <TableCell
style={{ style={{
maxWidth: "150px", maxWidth: "100px",
whiteSpace: "normal", whiteSpace: "normal",
wordBreak: "break-word", wordBreak: "break-word",
}} }}
> >
<Text>{model.model_name}</Text> <p style={{ fontSize: "10px" }}>
{model.model_name || "-"}
</p>
</TableCell> </TableCell>
<TableCell <TableCell
style={{ style={{
@ -1154,41 +1195,34 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
wordBreak: "break-word", wordBreak: "break-word",
}} }}
> >
{model.provider} <p style={{ fontSize: "10px" }}>
{model.provider || "-"}
</p>
</TableCell> </TableCell>
{userRole === "Admin" && ( {userRole === "Admin" && (
<TableCell <TableCell
style={{
maxWidth: "150px",
whiteSpace: "normal",
wordBreak: "break-word",
}}
>
{model.api_base}
</TableCell>
)}
<TableCell
style={{ style={{
maxWidth: "200px", maxWidth: "150px",
whiteSpace: "normal", whiteSpace: "normal",
wordBreak: "break-word", wordBreak: "break-word",
}} }}
> >
<Accordion> <Tooltip title={model && model.api_base}>
<AccordionHeader> <pre
<Text>Litellm params</Text> style={{
</AccordionHeader> maxWidth: "150px",
<AccordionBody> whiteSpace: "normal",
<pre> wordBreak: "break-word",
{JSON.stringify( fontSize: "10px",
model.cleanedLitellmParams, }}
null, title={model && model.api_base ? model.api_base : ""}
2 >
)} {model && model.api_base ? model.api_base.slice(0, 20) : "-"}
</pre> </pre>
</AccordionBody> </Tooltip>
</Accordion>
</TableCell> </TableCell>
)}
<TableCell <TableCell
style={{ style={{
maxWidth: "80px", maxWidth: "80px",
@ -1196,6 +1230,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
wordBreak: "break-word", wordBreak: "break-word",
}} }}
> >
<pre style={{ fontSize: "10px" }}>
{model.input_cost {model.input_cost
? model.input_cost ? model.input_cost
: model.litellm_params.input_cost_per_token : model.litellm_params.input_cost_per_token
@ -1205,6 +1240,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
) * 1000000 ) * 1000000
).toFixed(2) ).toFixed(2)
: null} : null}
</pre>
</TableCell> </TableCell>
<TableCell <TableCell
style={{ style={{
@ -1213,6 +1249,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
wordBreak: "break-word", wordBreak: "break-word",
}} }}
> >
<pre style={{ fontSize: "10px" }}>
{model.output_cost {model.output_cost
? model.output_cost ? model.output_cost
: model.litellm_params.output_cost_per_token : model.litellm_params.output_cost_per_token
@ -1222,17 +1259,21 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
) * 1000000 ) * 1000000
).toFixed(2) ).toFixed(2)
: null} : null}
</pre>
</TableCell> </TableCell>
<TableCell <TableCell>
style={{ <p style={{ fontSize: "10px" }}>
maxWidth: "120px", {
whiteSpace: "normal", premiumUser ? formatCreatedAt(model.model_info.created_at) || "-" : "-"
wordBreak: "break-word", }
}} </p>
>
<p style={{ fontSize: "10px" }}> </TableCell>
Max Tokens: {model.max_tokens} <br></br> <TableCell>
Max Input Tokens: {model.max_input_tokens} <p style={{ fontSize: "10px" }}>
{
premiumUser ? model.model_info.created_by || "-" : "-"
}
</p> </p>
</TableCell> </TableCell>
<TableCell <TableCell
@ -1248,7 +1289,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
size="xs" size="xs"
className="text-white" className="text-white"
> >
<p style={{ fontSize: "10px" }}>DB Model</p> <p style={{ fontSize: "8px" }}>DB Model</p>
</Badge> </Badge>
) : ( ) : (
<Badge <Badge
@ -1256,26 +1297,42 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
size="xs" size="xs"
className="text-black" className="text-black"
> >
<p style={{ fontSize: "10px" }}>Config Model</p> <p style={{ fontSize: "8px" }}>Config Model</p>
</Badge> </Badge>
)} )}
</TableCell> </TableCell>
<TableCell <TableCell
style={{ style={{
maxWidth: "100px", maxWidth: "150px",
whiteSpace: "normal", whiteSpace: "normal",
wordBreak: "break-word", wordBreak: "break-word",
}} }}
> >
<Grid numItems={3}>
<Col>
<Icon
icon={InformationCircleIcon}
size="sm"
onClick={() => handleInfoClick(model)}
/>
</Col>
<Col>
<Icon <Icon
icon={PencilAltIcon} icon={PencilAltIcon}
size="sm" size="sm"
onClick={() => handleEditClick(model)} onClick={() => handleEditClick(model)}
/> />
</Col>
<Col>
<DeleteModelButton <DeleteModelButton
modelID={model.model_info.id} modelID={model.model_info.id}
accessToken={accessToken} accessToken={accessToken}
/> />
</Col>
</Grid>
</TableCell> </TableCell>
</TableRow> </TableRow>
))} ))}
@ -1289,6 +1346,20 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
model={selectedModel} model={selectedModel}
onSubmit={handleEditSubmit} onSubmit={handleEditSubmit}
/> />
<Modal
title={selectedModel && selectedModel.model_name}
visible={infoModalVisible}
width={800}
footer={null}
onCancel={handleInfoCancel}
>
<Title>Model Info</Title>
<SyntaxHighlighter language="json" >
{selectedModel && JSON.stringify(selectedModel, null, 2)}
</SyntaxHighlighter>
</Modal>
</TabPanel> </TabPanel>
<TabPanel className="h-full"> <TabPanel className="h-full">
<Title2 level={2}>Add new model</Title2> <Title2 level={2}>Add new model</Title2>

View file

@ -1,20 +1,26 @@
import React, { useEffect, useState } from 'react'; import React, { useEffect, useState } from "react";
import { modelHubCall } from "./networking"; import { modelHubCall } from "./networking";
import { Card, Text, Title, Grid, Button, Badge, Tab, import {
TabGroup, Card,
TabList, Text,
TabPanel, Title,
TabPanels, } from "@tremor/react"; Grid,
Button,
Badge,
Tab,
TabGroup,
TabList,
TabPanel,
TabPanels,
} from "@tremor/react";
import { RightOutlined, CopyOutlined } from '@ant-design/icons'; import { RightOutlined, CopyOutlined } from "@ant-design/icons";
import { Modal, Tooltip } from 'antd'; import { Modal, Tooltip } from "antd";
import { Prism as SyntaxHighlighter } from "react-syntax-highlighter"; import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
interface ModelHubProps { interface ModelHubProps {
userID: string | null; userID: string | null;
userRole: string | null; userRole: string | null;
@ -22,7 +28,6 @@ interface ModelHubProps {
accessToken: string | null; accessToken: string | null;
keys: any; // Replace with the appropriate type for 'keys' prop keys: any; // Replace with the appropriate type for 'keys' prop
premiumUser: boolean; premiumUser: boolean;
} }
interface ModelInfo { interface ModelInfo {
@ -32,15 +37,13 @@ interface ModelInfo {
supports_vision: boolean; supports_vision: boolean;
max_input_tokens?: number; max_input_tokens?: number;
max_output_tokens?: number; max_output_tokens?: number;
supported_openai_params?: string[];
// Add other properties if needed
}
// Add other properties if needed
}
const ModelHub: React.FC<ModelHubProps> = ({ const ModelHub: React.FC<ModelHubProps> = ({
userID, userID,
userRole, userRole,
@ -52,140 +55,80 @@ const ModelHub: React.FC<ModelHubProps> = ({
keys, keys,
premiumUser, premiumUser,
}) => { }) => {
const [modelHubData, setModelHubData] = useState<ModelInfo[] | null>(null);
const [modelHubData, setModelHubData] = useState<ModelInfo[] | null>(null);
const [isModalVisible, setIsModalVisible] = useState(false); const [isModalVisible, setIsModalVisible] = useState(false);
const [selectedModel, setSelectedModel] = useState<null | ModelInfo>(null); const [selectedModel, setSelectedModel] = useState<null | ModelInfo>(null);
useEffect(() => { useEffect(() => {
if (!accessToken || !token || !userRole || !userID) { if (!accessToken || !token || !userRole || !userID) {
return; return;
} }
const fetchData = async () => { const fetchData = async () => {
try { try {
const _modelHubData = await modelHubCall(accessToken, userID, userRole); const _modelHubData = await modelHubCall(accessToken, userID, userRole);
console.log("ModelHubData:", _modelHubData); console.log("ModelHubData:", _modelHubData);
setModelHubData(_modelHubData.data); setModelHubData(_modelHubData.data);
} catch (error) { } catch (error) {
console.error("There was an error fetching the model data", error); console.error("There was an error fetching the model data", error);
} }
}; };
fetchData(); fetchData();
}, [accessToken, token, userRole, userID]); }, [accessToken, token, userRole, userID]);
const showModal = (model: ModelInfo) => { const showModal = (model: ModelInfo) => {
setSelectedModel(model); setSelectedModel(model);
setIsModalVisible(true); setIsModalVisible(true);
}; };
const handleOk = () => { const handleOk = () => {
setIsModalVisible(false); setIsModalVisible(false);
setSelectedModel(null); setSelectedModel(null);
}; };
const handleCancel = () => { const handleCancel = () => {
setIsModalVisible(false); setIsModalVisible(false);
setSelectedModel(null); setSelectedModel(null);
}; };
const copyToClipboard = (text: string) => { const copyToClipboard = (text: string) => {
navigator.clipboard.writeText(text); navigator.clipboard.writeText(text);
}; };
return ( return (
<div> <div>
<div className="w-full m-2 mt-2 p-8">
<div className="relative w-full"></div>
<div className="w-full m-2 mt-2 p-8"> <div className="flex items-center">
<Title className="ml-8 text-center ">Model Hub</Title>
<div className="relative w-full"> <Button className="ml-4">
<a href="https://forms.gle/W3U4PZpJGFHWtHyA9" target="_blank">
Make Public
</a>
</div> </Button>
<div className='flex items-center'>
<Title className='ml-8 text-center '>Model Hub</Title>
<Button className='ml-4'>
<a href="https://forms.gle/W3U4PZpJGFHWtHyA9" target="_blank">
Share
</a>
</Button>
</div> </div>
<div className="grid grid-cols-2 gap-6 sm:grid-cols-3 lg:grid-cols-4"> <div className="grid grid-cols-2 gap-6 sm:grid-cols-3 lg:grid-cols-4">
{modelHubData &&
modelHubData.map((model: ModelInfo) => (
{modelHubData && modelHubData.map((model: ModelInfo) => ( <Card key={model.model_group} className="mt-5 mx-8">
<pre className="flex justify-between">
<Card <Title>{model.model_group}</Title>
<Tooltip title={model.model_group}>
key={model.model_group} <CopyOutlined
onClick={() => copyToClipboard(model.model_group)}
className="mt-5 mx-8" style={{ cursor: "pointer", marginRight: "10px" }}
/>
> </Tooltip>
</pre>
<pre className='flex justify-between'>
<Title>{model.model_group}</Title>
<Tooltip title={model.model_group}>
<CopyOutlined onClick={() => copyToClipboard(model.model_group)} style={{ cursor: 'pointer', marginRight: '10px' }} />
</Tooltip>
</pre>
<div className='my-5'> <div className='my-5'>
<Text>Mode: {model.mode}</Text> <Text>Mode: {model.mode}</Text>
@ -193,52 +136,37 @@ const ModelHub: React.FC<ModelHubProps> = ({
<Text>Supports Vision: {model?.supports_vision == true ? "Yes" : "No"}</Text> <Text>Supports Vision: {model?.supports_vision == true ? "Yes" : "No"}</Text>
<Text>Max Input Tokens: {model?.max_input_tokens ? model?.max_input_tokens : "N/A"}</Text> <Text>Max Input Tokens: {model?.max_input_tokens ? model?.max_input_tokens : "N/A"}</Text>
<Text>Max Output Tokens: {model?.max_output_tokens ? model?.max_output_tokens : "N/A"}</Text> <Text>Max Output Tokens: {model?.max_output_tokens ? model?.max_output_tokens : "N/A"}</Text>
</div> </div>
<div style={{ marginTop: "auto", textAlign: "right" }}>
<div style={{ marginTop: 'auto', textAlign: 'right' }}> <a
href="#"
onClick={() => showModal(model)}
style={{ color: "#1890ff", fontSize: "smaller" }}
<a href="#" onClick={() => showModal(model)} style={{ color: '#1890ff', fontSize: 'smaller' }}> >
View more <RightOutlined />
View more <RightOutlined /> </a>
</div>
</a> </Card>
))}
</div>
</Card>
))}
</div> </div>
</div> </div>
<Modal <Modal
title={selectedModel && selectedModel.model_group ? selectedModel.model_group : "Unknown Model"}
title="Model Usage"
width={800} width={800}
visible={isModalVisible} visible={isModalVisible}
footer={null} footer={null}
onOk={handleOk} onOk={handleOk}
onCancel={handleCancel} onCancel={handleCancel}
> >
{selectedModel && ( {selectedModel && (
<div> <div>
<p className='mb-4'><strong>Model Information & Usage</strong></p>
<p><strong>Model Name:</strong> {selectedModel.model_group}</p>
<TabGroup> <TabGroup>
<TabList> <TabList>
<Tab>OpenAI Python SDK</Tab> <Tab>OpenAI Python SDK</Tab>
<Tab>Supported OpenAI Params</Tab>
<Tab>LlamaIndex</Tab> <Tab>LlamaIndex</Tab>
<Tab>Langchain Py</Tab> <Tab>Langchain Py</Tab>
</TabList> </TabList>
@ -267,8 +195,13 @@ print(response)
</SyntaxHighlighter> </SyntaxHighlighter>
</TabPanel> </TabPanel>
<TabPanel> <TabPanel>
<SyntaxHighlighter language="python"> <SyntaxHighlighter language="python">
{` {`${selectedModel.supported_openai_params?.map((param) => `${param}\n`).join('')}`}
</SyntaxHighlighter>
</TabPanel>
<TabPanel>
<SyntaxHighlighter language="python">
{`
import os, dotenv import os, dotenv
from llama_index.llms import AzureOpenAI from llama_index.llms import AzureOpenAI
@ -300,11 +233,11 @@ response = query_engine.query("What did the author do growing up?")
print(response) print(response)
`} `}
</SyntaxHighlighter> </SyntaxHighlighter>
</TabPanel> </TabPanel>
<TabPanel> <TabPanel>
<SyntaxHighlighter language="python"> <SyntaxHighlighter language="python">
{` {`
from langchain.chat_models import ChatOpenAI from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import ( from langchain.prompts.chat import (
ChatPromptTemplate, ChatPromptTemplate,
@ -332,27 +265,19 @@ response = chat(messages)
print(response) print(response)
`} `}
</SyntaxHighlighter> </SyntaxHighlighter>
</TabPanel> </TabPanel>
</TabPanels> </TabPanels>
</TabGroup> </TabGroup>
{/* <p><strong>Additional Params:</strong> {JSON.stringify(selectedModel.litellm_params)}</p> */} {/* <p><strong>Additional Params:</strong> {JSON.stringify(selectedModel.litellm_params)}</p> */}
{/* Add other model details here */} {/* Add other model details here */}
</div> </div>
)} )}
</Modal> </Modal>
</div> </div>
); );
}; };
export default ModelHub;
export default ModelHub;

View file

@ -1,6 +1,10 @@
"use client"; "use client";
import React, { useState, useEffect } from "react"; import React, { useState, useEffect } from "react";
import { userInfoCall, modelAvailableCall, getTotalSpendCall } from "./networking"; import {
userInfoCall,
modelAvailableCall,
getTotalSpendCall,
} from "./networking";
import { Grid, Col, Card, Text, Title } from "@tremor/react"; import { Grid, Col, Card, Text, Title } from "@tremor/react";
import CreateKey from "./create_key_button"; import CreateKey from "./create_key_button";
import ViewKeyTable from "./view_key_table"; import ViewKeyTable from "./view_key_table";
@ -19,7 +23,6 @@ type UserSpendData = {
max_budget?: number | null; max_budget?: number | null;
}; };
interface UserDashboardProps { interface UserDashboardProps {
userID: string | null; userID: string | null;
userRole: string | null; userRole: string | null;
@ -35,8 +38,8 @@ interface UserDashboardProps {
type TeamInterface = { type TeamInterface = {
models: any[]; models: any[];
team_id: null; team_id: null;
team_alias: String team_alias: String;
} };
const UserDashboard: React.FC<UserDashboardProps> = ({ const UserDashboard: React.FC<UserDashboardProps> = ({
userID, userID,
@ -63,10 +66,10 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
const [teamSpend, setTeamSpend] = useState<number | null>(null); const [teamSpend, setTeamSpend] = useState<number | null>(null);
const [userModels, setUserModels] = useState<string[]>([]); const [userModels, setUserModels] = useState<string[]>([]);
const defaultTeam: TeamInterface = { const defaultTeam: TeamInterface = {
"models": [], models: [],
"team_alias": "Default Team", team_alias: "Default Team",
"team_id": null team_id: null,
} };
const [selectedTeam, setSelectedTeam] = useState<any | null>( const [selectedTeam, setSelectedTeam] = useState<any | null>(
teams ? teams[0] : defaultTeam teams ? teams[0] : defaultTeam
); );
@ -137,7 +140,14 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
} else { } else {
const fetchData = async () => { const fetchData = async () => {
try { try {
const response = await userInfoCall(accessToken, userID, userRole, false, null, null); const response = await userInfoCall(
accessToken,
userID,
userRole,
false,
null,
null
);
console.log( console.log(
`received teams in user dashboard: ${Object.keys( `received teams in user dashboard: ${Object.keys(
response response
@ -152,12 +162,12 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
} }
setKeys(response["keys"]); // Assuming this is the correct path to your data setKeys(response["keys"]); // Assuming this is the correct path to your data
setTeams(response["teams"]); setTeams(response["teams"]);
const teamsArray = [...response['teams']]; const teamsArray = [...response["teams"]];
if (teamsArray.length > 0) { if (teamsArray.length > 0) {
console.log(`response['teams']: ${teamsArray}`); console.log(`response['teams']: ${teamsArray}`);
setSelectedTeam(teamsArray[0]); setSelectedTeam(teamsArray[0]);
} else { } else {
setSelectedTeam(defaultTeam); setSelectedTeam(defaultTeam);
} }
sessionStorage.setItem( sessionStorage.setItem(
"userData" + userID, "userData" + userID,
@ -194,22 +204,30 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
fetchData(); fetchData();
} }
} }
}, [userID, token, accessToken, keys, userRole]); }, [userID, token, accessToken, keys, userRole]);
useEffect(() => { useEffect(() => {
// This code will run every time selectedTeam changes // This code will run every time selectedTeam changes
if (keys !== null && selectedTeam !== null && selectedTeam !== undefined) { if (
keys !== null &&
selectedTeam !== null &&
selectedTeam !== undefined &&
selectedTeam.team_id !== null
) {
let sum = 0; let sum = 0;
for (const key of keys) { for (const key of keys) {
if (selectedTeam.hasOwnProperty('team_id') && key.team_id !== null && key.team_id === selectedTeam.team_id) { if (
selectedTeam.hasOwnProperty("team_id") &&
key.team_id !== null &&
key.team_id === selectedTeam.team_id
) {
sum += key.spend; sum += key.spend;
} }
} }
setTeamSpend(sum); setTeamSpend(sum);
} else if (keys !== null) { } else if (keys !== null) {
// sum the keys which don't have team-id set (default team) // sum the keys which don't have team-id set (default team)
let sum = 0 let sum = 0;
for (const key of keys) { for (const key of keys) {
sum += key.spend; sum += key.spend;
} }
@ -245,9 +263,8 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
} }
console.log("inside user dashboard, selected team", selectedTeam); console.log("inside user dashboard, selected team", selectedTeam);
console.log(`teamSpend: ${teamSpend}`)
return ( return (
<div className="w-full mx-4"> <div className="w-full mx-4">
<Grid numItems={1} className="gap-2 p-8 h-[75vh] w-full mt-2"> <Grid numItems={1} className="gap-2 p-8 h-[75vh] w-full mt-2">
<Col numColSpan={1}> <Col numColSpan={1}>
<ViewUserTeam <ViewUserTeam
@ -261,8 +278,7 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
userRole={userRole} userRole={userRole}
accessToken={accessToken} accessToken={accessToken}
userSpend={teamSpend} userSpend={teamSpend}
selectedTeam = {selectedTeam ? selectedTeam : null} selectedTeam={selectedTeam ? selectedTeam : null}
/> />
<ViewKeyTable <ViewKeyTable
@ -283,11 +299,15 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
data={keys} data={keys}
setData={setKeys} setData={setKeys}
/> />
<DashboardTeam teams={teams} setSelectedTeam={setSelectedTeam} userRole={userRole}/> <DashboardTeam
teams={teams}
setSelectedTeam={setSelectedTeam}
userRole={userRole}
/>
</Col> </Col>
</Grid> </Grid>
</div> </div>
); );
}; };
export default UserDashboard; export default UserDashboard;