forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_custom_pricing_ui_fix
This commit is contained in:
commit
dec482031b
32 changed files with 638 additions and 859 deletions
|
@ -11,7 +11,7 @@ Anthropic, OpenAI, Mistral, Llama and Gemini LLMs are Supported on Clarifai.
|
||||||
To obtain your Clarifai Personal access token follow this [link](https://docs.clarifai.com/clarifai-basics/authentication/personal-access-tokens/). Optionally the PAT can also be passed in `completion` function.
|
To obtain your Clarifai Personal access token follow this [link](https://docs.clarifai.com/clarifai-basics/authentication/personal-access-tokens/). Optionally the PAT can also be passed in `completion` function.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
os.environ["CALRIFAI_API_KEY"] = "YOUR_CLARIFAI_PAT" # CLARIFAI_PAT
|
os.environ["CLARIFAI_API_KEY"] = "YOUR_CLARIFAI_PAT" # CLARIFAI_PAT
|
||||||
```
|
```
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
|
@ -21,6 +21,7 @@ Features:
|
||||||
- ✅ Don't log/store specific requests to Langfuse, Sentry, etc. (eg confidential LLM requests)
|
- ✅ Don't log/store specific requests to Langfuse, Sentry, etc. (eg confidential LLM requests)
|
||||||
- ✅ Tracking Spend for Custom Tags
|
- ✅ Tracking Spend for Custom Tags
|
||||||
- ✅ Custom Branding + Routes on Swagger Docs
|
- ✅ Custom Branding + Routes on Swagger Docs
|
||||||
|
- ✅ Audit Logs for `Created At, Created By` when Models Added
|
||||||
|
|
||||||
|
|
||||||
## Content Moderation
|
## Content Moderation
|
||||||
|
|
|
@ -9,12 +9,3 @@ Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
|
||||||
|
|
||||||
[](https://wa.link/huol9n) [](https://discord.gg/wuPM9dRgDw)
|
[](https://wa.link/huol9n) [](https://discord.gg/wuPM9dRgDw)
|
||||||
|
|
||||||
## Stable Version
|
|
||||||
|
|
||||||
If you're running into problems with installation / Usage
|
|
||||||
Use the stable version of litellm
|
|
||||||
|
|
||||||
```shell
|
|
||||||
pip install litellm==0.1.819
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
|
@ -766,7 +766,12 @@ from .llms.bedrock import (
|
||||||
AmazonMistralConfig,
|
AmazonMistralConfig,
|
||||||
AmazonBedrockGlobalConfig,
|
AmazonBedrockGlobalConfig,
|
||||||
)
|
)
|
||||||
from .llms.openai import OpenAIConfig, OpenAITextCompletionConfig, MistralConfig
|
from .llms.openai import (
|
||||||
|
OpenAIConfig,
|
||||||
|
OpenAITextCompletionConfig,
|
||||||
|
MistralConfig,
|
||||||
|
DeepInfraConfig,
|
||||||
|
)
|
||||||
from .llms.azure import AzureOpenAIConfig, AzureOpenAIError
|
from .llms.azure import AzureOpenAIConfig, AzureOpenAIError
|
||||||
from .llms.watsonx import IBMWatsonXAIConfig
|
from .llms.watsonx import IBMWatsonXAIConfig
|
||||||
from .main import * # type: ignore
|
from .main import * # type: ignore
|
||||||
|
|
|
@ -157,6 +157,101 @@ class MistralConfig:
|
||||||
)
|
)
|
||||||
if param == "seed":
|
if param == "seed":
|
||||||
optional_params["extra_body"] = {"random_seed": value}
|
optional_params["extra_body"] = {"random_seed": value}
|
||||||
|
if param == "response_format":
|
||||||
|
optional_params["response_format"] = value
|
||||||
|
return optional_params
|
||||||
|
|
||||||
|
|
||||||
|
class DeepInfraConfig:
|
||||||
|
"""
|
||||||
|
Reference: https://deepinfra.com/docs/advanced/openai_api
|
||||||
|
|
||||||
|
The class `DeepInfra` provides configuration for the DeepInfra's Chat Completions API interface. Below are the parameters:
|
||||||
|
"""
|
||||||
|
|
||||||
|
frequency_penalty: Optional[int] = None
|
||||||
|
function_call: Optional[Union[str, dict]] = None
|
||||||
|
functions: Optional[list] = None
|
||||||
|
logit_bias: Optional[dict] = None
|
||||||
|
max_tokens: Optional[int] = None
|
||||||
|
n: Optional[int] = None
|
||||||
|
presence_penalty: Optional[int] = None
|
||||||
|
stop: Optional[Union[str, list]] = None
|
||||||
|
temperature: Optional[int] = None
|
||||||
|
top_p: Optional[int] = None
|
||||||
|
response_format: Optional[dict] = None
|
||||||
|
tools: Optional[list] = None
|
||||||
|
tool_choice: Optional[Union[str, dict]] = None
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
frequency_penalty: Optional[int] = None,
|
||||||
|
function_call: Optional[Union[str, dict]] = None,
|
||||||
|
functions: Optional[list] = None,
|
||||||
|
logit_bias: Optional[dict] = None,
|
||||||
|
max_tokens: Optional[int] = None,
|
||||||
|
n: Optional[int] = None,
|
||||||
|
presence_penalty: Optional[int] = None,
|
||||||
|
stop: Optional[Union[str, list]] = None,
|
||||||
|
temperature: Optional[int] = None,
|
||||||
|
top_p: Optional[int] = None,
|
||||||
|
response_format: Optional[dict] = None,
|
||||||
|
tools: Optional[list] = None,
|
||||||
|
tool_choice: Optional[Union[str, dict]] = None,
|
||||||
|
) -> None:
|
||||||
|
locals_ = locals().copy()
|
||||||
|
for key, value in locals_.items():
|
||||||
|
if key != "self" and value is not None:
|
||||||
|
setattr(self.__class__, key, value)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_config(cls):
|
||||||
|
return {
|
||||||
|
k: v
|
||||||
|
for k, v in cls.__dict__.items()
|
||||||
|
if not k.startswith("__")
|
||||||
|
and not isinstance(
|
||||||
|
v,
|
||||||
|
(
|
||||||
|
types.FunctionType,
|
||||||
|
types.BuiltinFunctionType,
|
||||||
|
classmethod,
|
||||||
|
staticmethod,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
and v is not None
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_supported_openai_params(self):
|
||||||
|
return [
|
||||||
|
"frequency_penalty",
|
||||||
|
"function_call",
|
||||||
|
"functions",
|
||||||
|
"logit_bias",
|
||||||
|
"max_tokens",
|
||||||
|
"n",
|
||||||
|
"presence_penalty",
|
||||||
|
"stop",
|
||||||
|
"temperature",
|
||||||
|
"top_p",
|
||||||
|
"response_format",
|
||||||
|
"tools",
|
||||||
|
"tool_choice",
|
||||||
|
]
|
||||||
|
|
||||||
|
def map_openai_params(
|
||||||
|
self, non_default_params: dict, optional_params: dict, model: str
|
||||||
|
):
|
||||||
|
supported_openai_params = self.get_supported_openai_params()
|
||||||
|
for param, value in non_default_params.items():
|
||||||
|
if (
|
||||||
|
param == "temperature"
|
||||||
|
and value == 0
|
||||||
|
and model == "mistralai/Mistral-7B-Instruct-v0.1"
|
||||||
|
): # this model does no support temperature == 0
|
||||||
|
value = 0.0001 # close to 0
|
||||||
|
if param in supported_openai_params:
|
||||||
|
optional_params[param] = value
|
||||||
return optional_params
|
return optional_params
|
||||||
|
|
||||||
|
|
||||||
|
@ -197,6 +292,7 @@ class OpenAIConfig:
|
||||||
stop: Optional[Union[str, list]] = None
|
stop: Optional[Union[str, list]] = None
|
||||||
temperature: Optional[int] = None
|
temperature: Optional[int] = None
|
||||||
top_p: Optional[int] = None
|
top_p: Optional[int] = None
|
||||||
|
response_format: Optional[dict] = None
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
@ -210,6 +306,7 @@ class OpenAIConfig:
|
||||||
stop: Optional[Union[str, list]] = None,
|
stop: Optional[Union[str, list]] = None,
|
||||||
temperature: Optional[int] = None,
|
temperature: Optional[int] = None,
|
||||||
top_p: Optional[int] = None,
|
top_p: Optional[int] = None,
|
||||||
|
response_format: Optional[dict] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
locals_ = locals().copy()
|
locals_ = locals().copy()
|
||||||
for key, value in locals_.items():
|
for key, value in locals_.items():
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[39712,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"608\",\"static/chunks/608-d128caa3cfe973c1.js\",\"931\",\"static/chunks/app/page-e266cb0126026d40.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"dYIEEO-62OCgyckEhgBd-\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[39712,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"608\",\"static/chunks/608-d128caa3cfe973c1.js\",\"931\",\"static/chunks/app/page-76d278f96a0e9768.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"D_ZUmMtLMPSa4aQQUJtKt\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[39712,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","608","static/chunks/608-d128caa3cfe973c1.js","931","static/chunks/app/page-e266cb0126026d40.js"],""]
|
3:I[39712,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","608","static/chunks/608-d128caa3cfe973c1.js","931","static/chunks/app/page-76d278f96a0e9768.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["dYIEEO-62OCgyckEhgBd-",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5d93d4a9fa59d72f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["D_ZUmMtLMPSa4aQQUJtKt",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5d93d4a9fa59d72f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -984,10 +984,6 @@ class LiteLLM_VerificationToken(LiteLLMBase):
|
||||||
|
|
||||||
org_id: Optional[str] = None # org id for a given key
|
org_id: Optional[str] = None # org id for a given key
|
||||||
|
|
||||||
# hidden params used for parallel request limiting, not required to create a token
|
|
||||||
user_id_rate_limits: Optional[dict] = None
|
|
||||||
team_id_rate_limits: Optional[dict] = None
|
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
protected_namespaces = ()
|
protected_namespaces = ()
|
||||||
|
|
||||||
|
|
|
@ -164,8 +164,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
||||||
# check if REQUEST ALLOWED for user_id
|
# check if REQUEST ALLOWED for user_id
|
||||||
user_id = user_api_key_dict.user_id
|
user_id = user_api_key_dict.user_id
|
||||||
if user_id is not None:
|
if user_id is not None:
|
||||||
_user_id_rate_limits = user_api_key_dict.user_id_rate_limits
|
_user_id_rate_limits = await self.user_api_key_cache.async_get_cache(
|
||||||
|
key=user_id
|
||||||
|
)
|
||||||
# get user tpm/rpm limits
|
# get user tpm/rpm limits
|
||||||
if _user_id_rate_limits is not None and isinstance(
|
if _user_id_rate_limits is not None and isinstance(
|
||||||
_user_id_rate_limits, dict
|
_user_id_rate_limits, dict
|
||||||
|
@ -196,13 +197,8 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
||||||
## get team tpm/rpm limits
|
## get team tpm/rpm limits
|
||||||
team_id = user_api_key_dict.team_id
|
team_id = user_api_key_dict.team_id
|
||||||
if team_id is not None:
|
if team_id is not None:
|
||||||
team_tpm_limit = getattr(user_api_key_dict, "team_tpm_limit", sys.maxsize)
|
team_tpm_limit = user_api_key_dict.team_tpm_limit
|
||||||
|
team_rpm_limit = user_api_key_dict.team_rpm_limit
|
||||||
if team_tpm_limit is None:
|
|
||||||
team_tpm_limit = sys.maxsize
|
|
||||||
team_rpm_limit = getattr(user_api_key_dict, "team_rpm_limit", sys.maxsize)
|
|
||||||
if team_rpm_limit is None:
|
|
||||||
team_rpm_limit = sys.maxsize
|
|
||||||
|
|
||||||
if team_tpm_limit is None:
|
if team_tpm_limit is None:
|
||||||
team_tpm_limit = sys.maxsize
|
team_tpm_limit = sys.maxsize
|
||||||
|
|
|
@ -1,379 +0,0 @@
|
||||||
# What is this?
|
|
||||||
## Checks TPM/RPM Limits for a key/user/team on the proxy
|
|
||||||
## Works with Redis - if given
|
|
||||||
|
|
||||||
from typing import Optional, Literal
|
|
||||||
import litellm, traceback, sys
|
|
||||||
from litellm.caching import DualCache, RedisCache
|
|
||||||
from litellm.proxy._types import (
|
|
||||||
UserAPIKeyAuth,
|
|
||||||
LiteLLM_VerificationTokenView,
|
|
||||||
LiteLLM_UserTable,
|
|
||||||
LiteLLM_TeamTable,
|
|
||||||
)
|
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
|
||||||
from fastapi import HTTPException
|
|
||||||
from litellm._logging import verbose_proxy_logger
|
|
||||||
from litellm import ModelResponse
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
|
|
||||||
class _PROXY_MaxTPMRPMLimiter(CustomLogger):
|
|
||||||
user_api_key_cache = None
|
|
||||||
|
|
||||||
# Class variables or attributes
|
|
||||||
def __init__(self, internal_cache: Optional[DualCache]):
|
|
||||||
if internal_cache is None:
|
|
||||||
self.internal_cache = DualCache()
|
|
||||||
else:
|
|
||||||
self.internal_cache = internal_cache
|
|
||||||
|
|
||||||
def print_verbose(self, print_statement):
|
|
||||||
try:
|
|
||||||
verbose_proxy_logger.debug(print_statement)
|
|
||||||
if litellm.set_verbose:
|
|
||||||
print(print_statement) # noqa
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
## check if admin has set tpm/rpm limits for this key/user/team
|
|
||||||
|
|
||||||
def _check_limits_set(
|
|
||||||
self,
|
|
||||||
user_api_key_cache: DualCache,
|
|
||||||
key: Optional[str],
|
|
||||||
user_id: Optional[str],
|
|
||||||
team_id: Optional[str],
|
|
||||||
) -> bool:
|
|
||||||
## key
|
|
||||||
if key is not None:
|
|
||||||
key_val = user_api_key_cache.get_cache(key=key)
|
|
||||||
if isinstance(key_val, dict):
|
|
||||||
key_val = LiteLLM_VerificationTokenView(**key_val)
|
|
||||||
|
|
||||||
if isinstance(key_val, LiteLLM_VerificationTokenView):
|
|
||||||
user_api_key_tpm_limit = key_val.tpm_limit
|
|
||||||
|
|
||||||
user_api_key_rpm_limit = key_val.rpm_limit
|
|
||||||
|
|
||||||
if (
|
|
||||||
user_api_key_tpm_limit is not None
|
|
||||||
or user_api_key_rpm_limit is not None
|
|
||||||
):
|
|
||||||
return True
|
|
||||||
|
|
||||||
## team
|
|
||||||
if team_id is not None:
|
|
||||||
team_val = user_api_key_cache.get_cache(key=team_id)
|
|
||||||
if isinstance(team_val, dict):
|
|
||||||
team_val = LiteLLM_TeamTable(**team_val)
|
|
||||||
|
|
||||||
if isinstance(team_val, LiteLLM_TeamTable):
|
|
||||||
team_tpm_limit = team_val.tpm_limit
|
|
||||||
|
|
||||||
team_rpm_limit = team_val.rpm_limit
|
|
||||||
|
|
||||||
if team_tpm_limit is not None or team_rpm_limit is not None:
|
|
||||||
return True
|
|
||||||
|
|
||||||
## user
|
|
||||||
if user_id is not None:
|
|
||||||
user_val = user_api_key_cache.get_cache(key=user_id)
|
|
||||||
if isinstance(user_val, dict):
|
|
||||||
user_val = LiteLLM_UserTable(**user_val)
|
|
||||||
|
|
||||||
if isinstance(user_val, LiteLLM_UserTable):
|
|
||||||
user_tpm_limit = user_val.tpm_limit
|
|
||||||
|
|
||||||
user_rpm_limit = user_val.rpm_limit
|
|
||||||
|
|
||||||
if user_tpm_limit is not None or user_rpm_limit is not None:
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
async def check_key_in_limits(
|
|
||||||
self,
|
|
||||||
user_api_key_dict: UserAPIKeyAuth,
|
|
||||||
current_minute_dict: dict,
|
|
||||||
tpm_limit: int,
|
|
||||||
rpm_limit: int,
|
|
||||||
request_count_api_key: str,
|
|
||||||
type: Literal["key", "user", "team"],
|
|
||||||
):
|
|
||||||
|
|
||||||
if type == "key" and user_api_key_dict.api_key is not None:
|
|
||||||
current = current_minute_dict["key"].get(user_api_key_dict.api_key, None)
|
|
||||||
elif type == "user" and user_api_key_dict.user_id is not None:
|
|
||||||
current = current_minute_dict["user"].get(user_api_key_dict.user_id, None)
|
|
||||||
elif type == "team" and user_api_key_dict.team_id is not None:
|
|
||||||
current = current_minute_dict["team"].get(user_api_key_dict.team_id, None)
|
|
||||||
else:
|
|
||||||
return
|
|
||||||
if current is None:
|
|
||||||
if tpm_limit == 0 or rpm_limit == 0:
|
|
||||||
# base case
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=429, detail="Max tpm/rpm limit reached."
|
|
||||||
)
|
|
||||||
elif current["current_tpm"] < tpm_limit and current["current_rpm"] < rpm_limit:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
raise HTTPException(status_code=429, detail="Max tpm/rpm limit reached.")
|
|
||||||
|
|
||||||
async def async_pre_call_hook(
|
|
||||||
self,
|
|
||||||
user_api_key_dict: UserAPIKeyAuth,
|
|
||||||
cache: DualCache,
|
|
||||||
data: dict,
|
|
||||||
call_type: str,
|
|
||||||
):
|
|
||||||
self.print_verbose(
|
|
||||||
f"Inside Max TPM/RPM Limiter Pre-Call Hook - {user_api_key_dict}"
|
|
||||||
)
|
|
||||||
api_key = user_api_key_dict.api_key
|
|
||||||
# check if REQUEST ALLOWED for user_id
|
|
||||||
user_id = user_api_key_dict.user_id
|
|
||||||
## get team tpm/rpm limits
|
|
||||||
team_id = user_api_key_dict.team_id
|
|
||||||
|
|
||||||
self.user_api_key_cache = cache
|
|
||||||
|
|
||||||
_set_limits = self._check_limits_set(
|
|
||||||
user_api_key_cache=cache, key=api_key, user_id=user_id, team_id=team_id
|
|
||||||
)
|
|
||||||
|
|
||||||
self.print_verbose(f"_set_limits: {_set_limits}")
|
|
||||||
|
|
||||||
if _set_limits == False:
|
|
||||||
return
|
|
||||||
|
|
||||||
# ------------
|
|
||||||
# Setup values
|
|
||||||
# ------------
|
|
||||||
|
|
||||||
current_date = datetime.now().strftime("%Y-%m-%d")
|
|
||||||
current_hour = datetime.now().strftime("%H")
|
|
||||||
current_minute = datetime.now().strftime("%M")
|
|
||||||
precise_minute = f"{current_date}-{current_hour}-{current_minute}"
|
|
||||||
cache_key = "usage:{}".format(precise_minute)
|
|
||||||
current_minute_dict = await self.internal_cache.async_get_cache(
|
|
||||||
key=cache_key
|
|
||||||
) # {"usage:{curr_minute}": {"key": {<api_key>: {"current_requests": 1, "current_tpm": 1, "current_rpm": 10}}}}
|
|
||||||
|
|
||||||
if current_minute_dict is None:
|
|
||||||
current_minute_dict = {"key": {}, "user": {}, "team": {}}
|
|
||||||
|
|
||||||
if api_key is not None:
|
|
||||||
tpm_limit = getattr(user_api_key_dict, "tpm_limit", sys.maxsize)
|
|
||||||
if tpm_limit is None:
|
|
||||||
tpm_limit = sys.maxsize
|
|
||||||
rpm_limit = getattr(user_api_key_dict, "rpm_limit", sys.maxsize)
|
|
||||||
if rpm_limit is None:
|
|
||||||
rpm_limit = sys.maxsize
|
|
||||||
request_count_api_key = f"{api_key}::{precise_minute}::request_count"
|
|
||||||
await self.check_key_in_limits(
|
|
||||||
user_api_key_dict=user_api_key_dict,
|
|
||||||
current_minute_dict=current_minute_dict,
|
|
||||||
request_count_api_key=request_count_api_key,
|
|
||||||
tpm_limit=tpm_limit,
|
|
||||||
rpm_limit=rpm_limit,
|
|
||||||
type="key",
|
|
||||||
)
|
|
||||||
|
|
||||||
if user_id is not None:
|
|
||||||
_user_id_rate_limits = user_api_key_dict.user_id_rate_limits
|
|
||||||
|
|
||||||
# get user tpm/rpm limits
|
|
||||||
if _user_id_rate_limits is not None and isinstance(
|
|
||||||
_user_id_rate_limits, dict
|
|
||||||
):
|
|
||||||
user_tpm_limit = _user_id_rate_limits.get("tpm_limit", None)
|
|
||||||
user_rpm_limit = _user_id_rate_limits.get("rpm_limit", None)
|
|
||||||
if user_tpm_limit is None:
|
|
||||||
user_tpm_limit = sys.maxsize
|
|
||||||
if user_rpm_limit is None:
|
|
||||||
user_rpm_limit = sys.maxsize
|
|
||||||
|
|
||||||
# now do the same tpm/rpm checks
|
|
||||||
request_count_api_key = f"{user_id}::{precise_minute}::request_count"
|
|
||||||
# print(f"Checking if {request_count_api_key} is allowed to make request for minute {precise_minute}")
|
|
||||||
await self.check_key_in_limits(
|
|
||||||
user_api_key_dict=user_api_key_dict,
|
|
||||||
current_minute_dict=current_minute_dict,
|
|
||||||
request_count_api_key=request_count_api_key,
|
|
||||||
tpm_limit=user_tpm_limit,
|
|
||||||
rpm_limit=user_rpm_limit,
|
|
||||||
type="user",
|
|
||||||
)
|
|
||||||
|
|
||||||
# TEAM RATE LIMITS
|
|
||||||
if team_id is not None:
|
|
||||||
team_tpm_limit = getattr(user_api_key_dict, "team_tpm_limit", sys.maxsize)
|
|
||||||
|
|
||||||
if team_tpm_limit is None:
|
|
||||||
team_tpm_limit = sys.maxsize
|
|
||||||
team_rpm_limit = getattr(user_api_key_dict, "team_rpm_limit", sys.maxsize)
|
|
||||||
if team_rpm_limit is None:
|
|
||||||
team_rpm_limit = sys.maxsize
|
|
||||||
|
|
||||||
if team_tpm_limit is None:
|
|
||||||
team_tpm_limit = sys.maxsize
|
|
||||||
if team_rpm_limit is None:
|
|
||||||
team_rpm_limit = sys.maxsize
|
|
||||||
|
|
||||||
# now do the same tpm/rpm checks
|
|
||||||
request_count_api_key = f"{team_id}::{precise_minute}::request_count"
|
|
||||||
|
|
||||||
# print(f"Checking if {request_count_api_key} is allowed to make request for minute {precise_minute}")
|
|
||||||
await self.check_key_in_limits(
|
|
||||||
user_api_key_dict=user_api_key_dict,
|
|
||||||
current_minute_dict=current_minute_dict,
|
|
||||||
request_count_api_key=request_count_api_key,
|
|
||||||
tpm_limit=team_tpm_limit,
|
|
||||||
rpm_limit=team_rpm_limit,
|
|
||||||
type="team",
|
|
||||||
)
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
|
||||||
try:
|
|
||||||
self.print_verbose(f"INSIDE TPM RPM Limiter ASYNC SUCCESS LOGGING")
|
|
||||||
|
|
||||||
user_api_key = kwargs["litellm_params"]["metadata"]["user_api_key"]
|
|
||||||
user_api_key_user_id = kwargs["litellm_params"]["metadata"].get(
|
|
||||||
"user_api_key_user_id", None
|
|
||||||
)
|
|
||||||
user_api_key_team_id = kwargs["litellm_params"]["metadata"].get(
|
|
||||||
"user_api_key_team_id", None
|
|
||||||
)
|
|
||||||
_limits_set = self._check_limits_set(
|
|
||||||
user_api_key_cache=self.user_api_key_cache,
|
|
||||||
key=user_api_key,
|
|
||||||
user_id=user_api_key_user_id,
|
|
||||||
team_id=user_api_key_team_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
if _limits_set == False: # don't waste cache calls if no tpm/rpm limits set
|
|
||||||
return
|
|
||||||
|
|
||||||
# ------------
|
|
||||||
# Setup values
|
|
||||||
# ------------
|
|
||||||
|
|
||||||
current_date = datetime.now().strftime("%Y-%m-%d")
|
|
||||||
current_hour = datetime.now().strftime("%H")
|
|
||||||
current_minute = datetime.now().strftime("%M")
|
|
||||||
precise_minute = f"{current_date}-{current_hour}-{current_minute}"
|
|
||||||
|
|
||||||
total_tokens = 0
|
|
||||||
|
|
||||||
if isinstance(response_obj, ModelResponse):
|
|
||||||
total_tokens = response_obj.usage.total_tokens
|
|
||||||
|
|
||||||
"""
|
|
||||||
- get value from redis
|
|
||||||
- increment requests + 1
|
|
||||||
- increment tpm + 1
|
|
||||||
- increment rpm + 1
|
|
||||||
- update value in-memory + redis
|
|
||||||
"""
|
|
||||||
cache_key = "usage:{}".format(precise_minute)
|
|
||||||
if (
|
|
||||||
self.internal_cache.redis_cache is not None
|
|
||||||
): # get straight from redis if possible
|
|
||||||
current_minute_dict = (
|
|
||||||
await self.internal_cache.redis_cache.async_get_cache(
|
|
||||||
key=cache_key,
|
|
||||||
)
|
|
||||||
) # {"usage:{current_minute}": {"key": {}, "team": {}, "user": {}}}
|
|
||||||
else:
|
|
||||||
current_minute_dict = await self.internal_cache.async_get_cache(
|
|
||||||
key=cache_key,
|
|
||||||
)
|
|
||||||
|
|
||||||
if current_minute_dict is None:
|
|
||||||
current_minute_dict = {"key": {}, "user": {}, "team": {}}
|
|
||||||
|
|
||||||
_cache_updated = False # check if a cache update is required. prevent unnecessary rewrites.
|
|
||||||
|
|
||||||
# ------------
|
|
||||||
# Update usage - API Key
|
|
||||||
# ------------
|
|
||||||
|
|
||||||
if user_api_key is not None:
|
|
||||||
_cache_updated = True
|
|
||||||
## API KEY ##
|
|
||||||
if user_api_key in current_minute_dict["key"]:
|
|
||||||
current_key_usage = current_minute_dict["key"][user_api_key]
|
|
||||||
new_val = {
|
|
||||||
"current_tpm": current_key_usage["current_tpm"] + total_tokens,
|
|
||||||
"current_rpm": current_key_usage["current_rpm"] + 1,
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
new_val = {
|
|
||||||
"current_tpm": total_tokens,
|
|
||||||
"current_rpm": 1,
|
|
||||||
}
|
|
||||||
|
|
||||||
current_minute_dict["key"][user_api_key] = new_val
|
|
||||||
|
|
||||||
self.print_verbose(
|
|
||||||
f"updated_value in success call: {new_val}, precise_minute: {precise_minute}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# ------------
|
|
||||||
# Update usage - User
|
|
||||||
# ------------
|
|
||||||
if user_api_key_user_id is not None:
|
|
||||||
_cache_updated = True
|
|
||||||
total_tokens = 0
|
|
||||||
|
|
||||||
if isinstance(response_obj, ModelResponse):
|
|
||||||
total_tokens = response_obj.usage.total_tokens
|
|
||||||
|
|
||||||
if user_api_key_user_id in current_minute_dict["key"]:
|
|
||||||
current_key_usage = current_minute_dict["key"][user_api_key_user_id]
|
|
||||||
new_val = {
|
|
||||||
"current_tpm": current_key_usage["current_tpm"] + total_tokens,
|
|
||||||
"current_rpm": current_key_usage["current_rpm"] + 1,
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
new_val = {
|
|
||||||
"current_tpm": total_tokens,
|
|
||||||
"current_rpm": 1,
|
|
||||||
}
|
|
||||||
|
|
||||||
current_minute_dict["user"][user_api_key_user_id] = new_val
|
|
||||||
|
|
||||||
# ------------
|
|
||||||
# Update usage - Team
|
|
||||||
# ------------
|
|
||||||
if user_api_key_team_id is not None:
|
|
||||||
_cache_updated = True
|
|
||||||
total_tokens = 0
|
|
||||||
|
|
||||||
if isinstance(response_obj, ModelResponse):
|
|
||||||
total_tokens = response_obj.usage.total_tokens
|
|
||||||
|
|
||||||
if user_api_key_team_id in current_minute_dict["key"]:
|
|
||||||
current_key_usage = current_minute_dict["key"][user_api_key_team_id]
|
|
||||||
new_val = {
|
|
||||||
"current_tpm": current_key_usage["current_tpm"] + total_tokens,
|
|
||||||
"current_rpm": current_key_usage["current_rpm"] + 1,
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
new_val = {
|
|
||||||
"current_tpm": total_tokens,
|
|
||||||
"current_rpm": 1,
|
|
||||||
}
|
|
||||||
|
|
||||||
current_minute_dict["team"][user_api_key_team_id] = new_val
|
|
||||||
|
|
||||||
if _cache_updated == True:
|
|
||||||
await self.internal_cache.async_set_cache(
|
|
||||||
key=cache_key, value=current_minute_dict
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
self.print_verbose("{}\n{}".format(e, traceback.format_exc())) # noqa
|
|
|
@ -397,6 +397,7 @@ def _get_pydantic_json_dict(pydantic_obj: BaseModel) -> dict:
|
||||||
|
|
||||||
def get_custom_headers(
|
def get_custom_headers(
|
||||||
*,
|
*,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
model_id: Optional[str] = None,
|
model_id: Optional[str] = None,
|
||||||
cache_key: Optional[str] = None,
|
cache_key: Optional[str] = None,
|
||||||
api_base: Optional[str] = None,
|
api_base: Optional[str] = None,
|
||||||
|
@ -410,6 +411,8 @@ def get_custom_headers(
|
||||||
"x-litellm-model-api-base": api_base,
|
"x-litellm-model-api-base": api_base,
|
||||||
"x-litellm-version": version,
|
"x-litellm-version": version,
|
||||||
"x-litellm-model-region": model_region,
|
"x-litellm-model-region": model_region,
|
||||||
|
"x-litellm-key-tpm-limit": str(user_api_key_dict.tpm_limit),
|
||||||
|
"x-litellm-key-rpm-limit": str(user_api_key_dict.rpm_limit),
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
return {
|
return {
|
||||||
|
@ -2787,6 +2790,13 @@ class ProxyConfig:
|
||||||
model.model_info["id"] = _id
|
model.model_info["id"] = _id
|
||||||
model.model_info["db_model"] = True
|
model.model_info["db_model"] = True
|
||||||
|
|
||||||
|
if premium_user is True:
|
||||||
|
# seeing "created_at", "updated_at", "created_by", "updated_by" is a LiteLLM Enterprise Feature
|
||||||
|
model.model_info["created_at"] = getattr(model, "created_at", None)
|
||||||
|
model.model_info["updated_at"] = getattr(model, "updated_at", None)
|
||||||
|
model.model_info["created_by"] = getattr(model, "created_by", None)
|
||||||
|
model.model_info["updated_by"] = getattr(model, "updated_by", None)
|
||||||
|
|
||||||
if model.model_info is not None and isinstance(model.model_info, dict):
|
if model.model_info is not None and isinstance(model.model_info, dict):
|
||||||
if "id" not in model.model_info:
|
if "id" not in model.model_info:
|
||||||
model.model_info["id"] = model.model_id
|
model.model_info["id"] = model.model_id
|
||||||
|
@ -3072,10 +3082,9 @@ class ProxyConfig:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if master_key is None or not isinstance(master_key, str):
|
if master_key is None or not isinstance(master_key, str):
|
||||||
raise Exception(
|
raise ValueError(
|
||||||
f"Master key is not initialized or formatted. master_key={master_key}"
|
f"Master key is not initialized or formatted. master_key={master_key}"
|
||||||
)
|
)
|
||||||
verbose_proxy_logger.debug(f"llm_router: {llm_router}")
|
|
||||||
new_models = await prisma_client.db.litellm_proxymodeltable.find_many()
|
new_models = await prisma_client.db.litellm_proxymodeltable.find_many()
|
||||||
# update llm router
|
# update llm router
|
||||||
await self._update_llm_router(
|
await self._update_llm_router(
|
||||||
|
@ -4059,6 +4068,7 @@ async def chat_completion(
|
||||||
"stream" in data and data["stream"] == True
|
"stream" in data and data["stream"] == True
|
||||||
): # use generate_responses to stream responses
|
): # use generate_responses to stream responses
|
||||||
custom_headers = get_custom_headers(
|
custom_headers = get_custom_headers(
|
||||||
|
user_api_key_dict=user_api_key_dict,
|
||||||
model_id=model_id,
|
model_id=model_id,
|
||||||
cache_key=cache_key,
|
cache_key=cache_key,
|
||||||
api_base=api_base,
|
api_base=api_base,
|
||||||
|
@ -4078,6 +4088,7 @@ async def chat_completion(
|
||||||
|
|
||||||
fastapi_response.headers.update(
|
fastapi_response.headers.update(
|
||||||
get_custom_headers(
|
get_custom_headers(
|
||||||
|
user_api_key_dict=user_api_key_dict,
|
||||||
model_id=model_id,
|
model_id=model_id,
|
||||||
cache_key=cache_key,
|
cache_key=cache_key,
|
||||||
api_base=api_base,
|
api_base=api_base,
|
||||||
|
@ -4298,6 +4309,7 @@ async def completion(
|
||||||
"stream" in data and data["stream"] == True
|
"stream" in data and data["stream"] == True
|
||||||
): # use generate_responses to stream responses
|
): # use generate_responses to stream responses
|
||||||
custom_headers = get_custom_headers(
|
custom_headers = get_custom_headers(
|
||||||
|
user_api_key_dict=user_api_key_dict,
|
||||||
model_id=model_id,
|
model_id=model_id,
|
||||||
cache_key=cache_key,
|
cache_key=cache_key,
|
||||||
api_base=api_base,
|
api_base=api_base,
|
||||||
|
@ -4316,6 +4328,7 @@ async def completion(
|
||||||
)
|
)
|
||||||
fastapi_response.headers.update(
|
fastapi_response.headers.update(
|
||||||
get_custom_headers(
|
get_custom_headers(
|
||||||
|
user_api_key_dict=user_api_key_dict,
|
||||||
model_id=model_id,
|
model_id=model_id,
|
||||||
cache_key=cache_key,
|
cache_key=cache_key,
|
||||||
api_base=api_base,
|
api_base=api_base,
|
||||||
|
@ -4565,6 +4578,7 @@ async def embeddings(
|
||||||
|
|
||||||
fastapi_response.headers.update(
|
fastapi_response.headers.update(
|
||||||
get_custom_headers(
|
get_custom_headers(
|
||||||
|
user_api_key_dict=user_api_key_dict,
|
||||||
model_id=model_id,
|
model_id=model_id,
|
||||||
cache_key=cache_key,
|
cache_key=cache_key,
|
||||||
api_base=api_base,
|
api_base=api_base,
|
||||||
|
@ -4748,6 +4762,7 @@ async def image_generation(
|
||||||
|
|
||||||
fastapi_response.headers.update(
|
fastapi_response.headers.update(
|
||||||
get_custom_headers(
|
get_custom_headers(
|
||||||
|
user_api_key_dict=user_api_key_dict,
|
||||||
model_id=model_id,
|
model_id=model_id,
|
||||||
cache_key=cache_key,
|
cache_key=cache_key,
|
||||||
api_base=api_base,
|
api_base=api_base,
|
||||||
|
@ -4949,6 +4964,7 @@ async def audio_transcriptions(
|
||||||
|
|
||||||
fastapi_response.headers.update(
|
fastapi_response.headers.update(
|
||||||
get_custom_headers(
|
get_custom_headers(
|
||||||
|
user_api_key_dict=user_api_key_dict,
|
||||||
model_id=model_id,
|
model_id=model_id,
|
||||||
cache_key=cache_key,
|
cache_key=cache_key,
|
||||||
api_base=api_base,
|
api_base=api_base,
|
||||||
|
@ -5132,6 +5148,7 @@ async def moderations(
|
||||||
|
|
||||||
fastapi_response.headers.update(
|
fastapi_response.headers.update(
|
||||||
get_custom_headers(
|
get_custom_headers(
|
||||||
|
user_api_key_dict=user_api_key_dict,
|
||||||
model_id=model_id,
|
model_id=model_id,
|
||||||
cache_key=cache_key,
|
cache_key=cache_key,
|
||||||
api_base=api_base,
|
api_base=api_base,
|
||||||
|
@ -6083,7 +6100,7 @@ async def get_global_activity_model(
|
||||||
|
|
||||||
sql_query = """
|
sql_query = """
|
||||||
SELECT
|
SELECT
|
||||||
model,
|
model_group AS model,
|
||||||
date_trunc('day', "startTime") AS date,
|
date_trunc('day', "startTime") AS date,
|
||||||
COUNT(*) AS api_requests,
|
COUNT(*) AS api_requests,
|
||||||
SUM(total_tokens) AS total_tokens
|
SUM(total_tokens) AS total_tokens
|
||||||
|
|
|
@ -35,7 +35,6 @@ from litellm import (
|
||||||
)
|
)
|
||||||
from litellm.utils import ModelResponseIterator
|
from litellm.utils import ModelResponseIterator
|
||||||
from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
|
from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
|
||||||
from litellm.proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter
|
|
||||||
from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
|
from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.proxy.db.base_client import CustomDB
|
from litellm.proxy.db.base_client import CustomDB
|
||||||
|
@ -81,9 +80,6 @@ class ProxyLogging:
|
||||||
self.call_details["user_api_key_cache"] = user_api_key_cache
|
self.call_details["user_api_key_cache"] = user_api_key_cache
|
||||||
self.internal_usage_cache = DualCache()
|
self.internal_usage_cache = DualCache()
|
||||||
self.max_parallel_request_limiter = _PROXY_MaxParallelRequestsHandler()
|
self.max_parallel_request_limiter = _PROXY_MaxParallelRequestsHandler()
|
||||||
self.max_tpm_rpm_limiter = _PROXY_MaxTPMRPMLimiter(
|
|
||||||
internal_cache=self.internal_usage_cache
|
|
||||||
)
|
|
||||||
self.max_budget_limiter = _PROXY_MaxBudgetLimiter()
|
self.max_budget_limiter = _PROXY_MaxBudgetLimiter()
|
||||||
self.cache_control_check = _PROXY_CacheControlCheck()
|
self.cache_control_check = _PROXY_CacheControlCheck()
|
||||||
self.alerting: Optional[List] = None
|
self.alerting: Optional[List] = None
|
||||||
|
@ -144,7 +140,6 @@ class ProxyLogging:
|
||||||
print_verbose(f"INITIALIZING LITELLM CALLBACKS!")
|
print_verbose(f"INITIALIZING LITELLM CALLBACKS!")
|
||||||
self.service_logging_obj = ServiceLogging()
|
self.service_logging_obj = ServiceLogging()
|
||||||
litellm.callbacks.append(self.max_parallel_request_limiter)
|
litellm.callbacks.append(self.max_parallel_request_limiter)
|
||||||
litellm.callbacks.append(self.max_tpm_rpm_limiter)
|
|
||||||
litellm.callbacks.append(self.max_budget_limiter)
|
litellm.callbacks.append(self.max_budget_limiter)
|
||||||
litellm.callbacks.append(self.cache_control_check)
|
litellm.callbacks.append(self.cache_control_check)
|
||||||
litellm.callbacks.append(self.service_logging_obj)
|
litellm.callbacks.append(self.service_logging_obj)
|
||||||
|
|
|
@ -38,6 +38,7 @@ from litellm.utils import (
|
||||||
import copy
|
import copy
|
||||||
from litellm._logging import verbose_router_logger
|
from litellm._logging import verbose_router_logger
|
||||||
import logging
|
import logging
|
||||||
|
from litellm.types.utils import ModelInfo as ModelMapInfo
|
||||||
from litellm.types.router import (
|
from litellm.types.router import (
|
||||||
Deployment,
|
Deployment,
|
||||||
ModelInfo,
|
ModelInfo,
|
||||||
|
@ -349,17 +350,13 @@ class Router:
|
||||||
def validate_fallbacks(self, fallback_param: Optional[List]):
|
def validate_fallbacks(self, fallback_param: Optional[List]):
|
||||||
if fallback_param is None:
|
if fallback_param is None:
|
||||||
return
|
return
|
||||||
if len(fallback_param) > 0: # if set
|
|
||||||
## for dictionary in list, check if only 1 key in dict
|
for fallback_dict in fallback_param:
|
||||||
for _dict in fallback_param:
|
if not isinstance(fallback_dict, dict):
|
||||||
assert isinstance(_dict, dict), "Item={}, not a dictionary".format(
|
raise ValueError(f"Item '{fallback_dict}' is not a dictionary.")
|
||||||
_dict
|
if len(fallback_dict) != 1:
|
||||||
)
|
raise ValueError(
|
||||||
assert (
|
f"Dictionary '{fallback_dict}' must have exactly one key, but has {len(fallback_dict)} keys.")
|
||||||
len(_dict.keys()) == 1
|
|
||||||
), "Only 1 key allows in dictionary. You set={} for dict={}".format(
|
|
||||||
len(_dict.keys()), _dict
|
|
||||||
)
|
|
||||||
|
|
||||||
def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict):
|
def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict):
|
||||||
if routing_strategy == "least-busy":
|
if routing_strategy == "least-busy":
|
||||||
|
@ -3065,16 +3062,31 @@ class Router:
|
||||||
try:
|
try:
|
||||||
model_info = litellm.get_model_info(model=litellm_params.model)
|
model_info = litellm.get_model_info(model=litellm_params.model)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
continue
|
model_info = None
|
||||||
# get llm provider
|
# get llm provider
|
||||||
try:
|
try:
|
||||||
model, llm_provider, _, _ = litellm.get_llm_provider(
|
model, llm_provider, _, _ = litellm.get_llm_provider(
|
||||||
model=litellm_params.model,
|
model=litellm_params.model,
|
||||||
custom_llm_provider=litellm_params.custom_llm_provider,
|
custom_llm_provider=litellm_params.custom_llm_provider,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except litellm.exceptions.BadRequestError as e:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if model_info is None:
|
||||||
|
supported_openai_params = litellm.get_supported_openai_params(
|
||||||
|
model=model, custom_llm_provider=llm_provider
|
||||||
|
)
|
||||||
|
model_info = ModelMapInfo(
|
||||||
|
max_tokens=None,
|
||||||
|
max_input_tokens=None,
|
||||||
|
max_output_tokens=None,
|
||||||
|
input_cost_per_token=0,
|
||||||
|
output_cost_per_token=0,
|
||||||
|
litellm_provider=llm_provider,
|
||||||
|
mode="chat",
|
||||||
|
supported_openai_params=supported_openai_params,
|
||||||
|
)
|
||||||
|
|
||||||
if model_group_info is None:
|
if model_group_info is None:
|
||||||
model_group_info = ModelGroupInfo(
|
model_group_info = ModelGroupInfo(
|
||||||
model_group=model_group, providers=[llm_provider], **model_info # type: ignore
|
model_group=model_group, providers=[llm_provider], **model_info # type: ignore
|
||||||
|
@ -3089,18 +3101,26 @@ class Router:
|
||||||
# supports_function_calling == True
|
# supports_function_calling == True
|
||||||
if llm_provider not in model_group_info.providers:
|
if llm_provider not in model_group_info.providers:
|
||||||
model_group_info.providers.append(llm_provider)
|
model_group_info.providers.append(llm_provider)
|
||||||
if model_info.get("max_input_tokens", None) is not None and (
|
if (
|
||||||
model_group_info.max_input_tokens is None
|
model_info.get("max_input_tokens", None) is not None
|
||||||
or model_info["max_input_tokens"]
|
and model_info["max_input_tokens"] is not None
|
||||||
> model_group_info.max_input_tokens
|
and (
|
||||||
|
model_group_info.max_input_tokens is None
|
||||||
|
or model_info["max_input_tokens"]
|
||||||
|
> model_group_info.max_input_tokens
|
||||||
|
)
|
||||||
):
|
):
|
||||||
model_group_info.max_input_tokens = model_info[
|
model_group_info.max_input_tokens = model_info[
|
||||||
"max_input_tokens"
|
"max_input_tokens"
|
||||||
]
|
]
|
||||||
if model_info.get("max_output_tokens", None) is not None and (
|
if (
|
||||||
model_group_info.max_output_tokens is None
|
model_info.get("max_output_tokens", None) is not None
|
||||||
or model_info["max_output_tokens"]
|
and model_info["max_output_tokens"] is not None
|
||||||
> model_group_info.max_output_tokens
|
and (
|
||||||
|
model_group_info.max_output_tokens is None
|
||||||
|
or model_info["max_output_tokens"]
|
||||||
|
> model_group_info.max_output_tokens
|
||||||
|
)
|
||||||
):
|
):
|
||||||
model_group_info.max_output_tokens = model_info[
|
model_group_info.max_output_tokens = model_info[
|
||||||
"max_output_tokens"
|
"max_output_tokens"
|
||||||
|
@ -3124,19 +3144,26 @@ class Router:
|
||||||
if (
|
if (
|
||||||
model_info.get("supports_parallel_function_calling", None)
|
model_info.get("supports_parallel_function_calling", None)
|
||||||
is not None
|
is not None
|
||||||
and model_info["supports_parallel_function_calling"] == True # type: ignore
|
and model_info["supports_parallel_function_calling"] is True # type: ignore
|
||||||
):
|
):
|
||||||
model_group_info.supports_parallel_function_calling = True
|
model_group_info.supports_parallel_function_calling = True
|
||||||
if (
|
if (
|
||||||
model_info.get("supports_vision", None) is not None
|
model_info.get("supports_vision", None) is not None
|
||||||
and model_info["supports_vision"] == True # type: ignore
|
and model_info["supports_vision"] is True # type: ignore
|
||||||
):
|
):
|
||||||
model_group_info.supports_vision = True
|
model_group_info.supports_vision = True
|
||||||
if (
|
if (
|
||||||
model_info.get("supports_function_calling", None) is not None
|
model_info.get("supports_function_calling", None) is not None
|
||||||
and model_info["supports_function_calling"] == True # type: ignore
|
and model_info["supports_function_calling"] is True # type: ignore
|
||||||
):
|
):
|
||||||
model_group_info.supports_function_calling = True
|
model_group_info.supports_function_calling = True
|
||||||
|
if (
|
||||||
|
model_info.get("supported_openai_params", None) is not None
|
||||||
|
and model_info["supported_openai_params"] is not None
|
||||||
|
):
|
||||||
|
model_group_info.supported_openai_params = model_info[
|
||||||
|
"supported_openai_params"
|
||||||
|
]
|
||||||
|
|
||||||
return model_group_info
|
return model_group_info
|
||||||
|
|
||||||
|
|
|
@ -1,162 +1,163 @@
|
||||||
|
### REPLACED BY 'test_parallel_request_limiter.py' ###
|
||||||
# What is this?
|
# What is this?
|
||||||
## Unit tests for the max tpm / rpm limiter hook for proxy
|
## Unit tests for the max tpm / rpm limiter hook for proxy
|
||||||
|
|
||||||
import sys, os, asyncio, time, random
|
# import sys, os, asyncio, time, random
|
||||||
from datetime import datetime
|
# from datetime import datetime
|
||||||
import traceback
|
# import traceback
|
||||||
from dotenv import load_dotenv
|
# from dotenv import load_dotenv
|
||||||
from typing import Optional
|
# from typing import Optional
|
||||||
|
|
||||||
load_dotenv()
|
# load_dotenv()
|
||||||
import os
|
# import os
|
||||||
|
|
||||||
sys.path.insert(
|
# sys.path.insert(
|
||||||
0, os.path.abspath("../..")
|
# 0, os.path.abspath("../..")
|
||||||
) # Adds the parent directory to the system path
|
# ) # Adds the parent directory to the system path
|
||||||
import pytest
|
# import pytest
|
||||||
import litellm
|
# import litellm
|
||||||
from litellm import Router
|
# from litellm import Router
|
||||||
from litellm.proxy.utils import ProxyLogging, hash_token
|
# from litellm.proxy.utils import ProxyLogging, hash_token
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
# from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.caching import DualCache, RedisCache
|
# from litellm.caching import DualCache, RedisCache
|
||||||
from litellm.proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter
|
# from litellm.proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter
|
||||||
from datetime import datetime
|
# from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
# @pytest.mark.asyncio
|
||||||
async def test_pre_call_hook_rpm_limits():
|
# async def test_pre_call_hook_rpm_limits():
|
||||||
"""
|
# """
|
||||||
Test if error raised on hitting rpm limits
|
# Test if error raised on hitting rpm limits
|
||||||
"""
|
# """
|
||||||
litellm.set_verbose = True
|
# litellm.set_verbose = True
|
||||||
_api_key = hash_token("sk-12345")
|
# _api_key = hash_token("sk-12345")
|
||||||
user_api_key_dict = UserAPIKeyAuth(api_key=_api_key, tpm_limit=9, rpm_limit=1)
|
# user_api_key_dict = UserAPIKeyAuth(api_key=_api_key, tpm_limit=9, rpm_limit=1)
|
||||||
local_cache = DualCache()
|
# local_cache = DualCache()
|
||||||
# redis_usage_cache = RedisCache()
|
# # redis_usage_cache = RedisCache()
|
||||||
|
|
||||||
local_cache.set_cache(
|
# local_cache.set_cache(
|
||||||
key=_api_key, value={"api_key": _api_key, "tpm_limit": 9, "rpm_limit": 1}
|
# key=_api_key, value={"api_key": _api_key, "tpm_limit": 9, "rpm_limit": 1}
|
||||||
)
|
# )
|
||||||
|
|
||||||
tpm_rpm_limiter = _PROXY_MaxTPMRPMLimiter(internal_cache=DualCache())
|
# tpm_rpm_limiter = _PROXY_MaxTPMRPMLimiter(internal_cache=DualCache())
|
||||||
|
|
||||||
await tpm_rpm_limiter.async_pre_call_hook(
|
# await tpm_rpm_limiter.async_pre_call_hook(
|
||||||
user_api_key_dict=user_api_key_dict, cache=local_cache, data={}, call_type=""
|
# user_api_key_dict=user_api_key_dict, cache=local_cache, data={}, call_type=""
|
||||||
)
|
# )
|
||||||
|
|
||||||
kwargs = {"litellm_params": {"metadata": {"user_api_key": _api_key}}}
|
# kwargs = {"litellm_params": {"metadata": {"user_api_key": _api_key}}}
|
||||||
|
|
||||||
await tpm_rpm_limiter.async_log_success_event(
|
# await tpm_rpm_limiter.async_log_success_event(
|
||||||
kwargs=kwargs,
|
# kwargs=kwargs,
|
||||||
response_obj="",
|
# response_obj="",
|
||||||
start_time="",
|
# start_time="",
|
||||||
end_time="",
|
# end_time="",
|
||||||
)
|
# )
|
||||||
|
|
||||||
## Expected cache val: {"current_requests": 0, "current_tpm": 0, "current_rpm": 1}
|
# ## Expected cache val: {"current_requests": 0, "current_tpm": 0, "current_rpm": 1}
|
||||||
|
|
||||||
try:
|
# try:
|
||||||
await tpm_rpm_limiter.async_pre_call_hook(
|
# await tpm_rpm_limiter.async_pre_call_hook(
|
||||||
user_api_key_dict=user_api_key_dict,
|
# user_api_key_dict=user_api_key_dict,
|
||||||
cache=local_cache,
|
# cache=local_cache,
|
||||||
data={},
|
# data={},
|
||||||
call_type="",
|
# call_type="",
|
||||||
)
|
# )
|
||||||
|
|
||||||
pytest.fail(f"Expected call to fail")
|
# pytest.fail(f"Expected call to fail")
|
||||||
except Exception as e:
|
# except Exception as e:
|
||||||
assert e.status_code == 429
|
# assert e.status_code == 429
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
# @pytest.mark.asyncio
|
||||||
async def test_pre_call_hook_team_rpm_limits(
|
# async def test_pre_call_hook_team_rpm_limits(
|
||||||
_redis_usage_cache: Optional[RedisCache] = None,
|
# _redis_usage_cache: Optional[RedisCache] = None,
|
||||||
):
|
# ):
|
||||||
"""
|
# """
|
||||||
Test if error raised on hitting team rpm limits
|
# Test if error raised on hitting team rpm limits
|
||||||
"""
|
# """
|
||||||
litellm.set_verbose = True
|
# litellm.set_verbose = True
|
||||||
_api_key = "sk-12345"
|
# _api_key = "sk-12345"
|
||||||
_team_id = "unique-team-id"
|
# _team_id = "unique-team-id"
|
||||||
_user_api_key_dict = {
|
# _user_api_key_dict = {
|
||||||
"api_key": _api_key,
|
# "api_key": _api_key,
|
||||||
"max_parallel_requests": 1,
|
# "max_parallel_requests": 1,
|
||||||
"tpm_limit": 9,
|
# "tpm_limit": 9,
|
||||||
"rpm_limit": 10,
|
# "rpm_limit": 10,
|
||||||
"team_rpm_limit": 1,
|
# "team_rpm_limit": 1,
|
||||||
"team_id": _team_id,
|
# "team_id": _team_id,
|
||||||
}
|
# }
|
||||||
user_api_key_dict = UserAPIKeyAuth(**_user_api_key_dict) # type: ignore
|
# user_api_key_dict = UserAPIKeyAuth(**_user_api_key_dict) # type: ignore
|
||||||
_api_key = hash_token(_api_key)
|
# _api_key = hash_token(_api_key)
|
||||||
local_cache = DualCache()
|
# local_cache = DualCache()
|
||||||
local_cache.set_cache(key=_api_key, value=_user_api_key_dict)
|
# local_cache.set_cache(key=_api_key, value=_user_api_key_dict)
|
||||||
internal_cache = DualCache(redis_cache=_redis_usage_cache)
|
# internal_cache = DualCache(redis_cache=_redis_usage_cache)
|
||||||
tpm_rpm_limiter = _PROXY_MaxTPMRPMLimiter(internal_cache=internal_cache)
|
# tpm_rpm_limiter = _PROXY_MaxTPMRPMLimiter(internal_cache=internal_cache)
|
||||||
await tpm_rpm_limiter.async_pre_call_hook(
|
# await tpm_rpm_limiter.async_pre_call_hook(
|
||||||
user_api_key_dict=user_api_key_dict, cache=local_cache, data={}, call_type=""
|
# user_api_key_dict=user_api_key_dict, cache=local_cache, data={}, call_type=""
|
||||||
)
|
# )
|
||||||
|
|
||||||
kwargs = {
|
# kwargs = {
|
||||||
"litellm_params": {
|
# "litellm_params": {
|
||||||
"metadata": {"user_api_key": _api_key, "user_api_key_team_id": _team_id}
|
# "metadata": {"user_api_key": _api_key, "user_api_key_team_id": _team_id}
|
||||||
}
|
# }
|
||||||
}
|
# }
|
||||||
|
|
||||||
await tpm_rpm_limiter.async_log_success_event(
|
# await tpm_rpm_limiter.async_log_success_event(
|
||||||
kwargs=kwargs,
|
# kwargs=kwargs,
|
||||||
response_obj="",
|
# response_obj="",
|
||||||
start_time="",
|
# start_time="",
|
||||||
end_time="",
|
# end_time="",
|
||||||
)
|
# )
|
||||||
|
|
||||||
print(f"local_cache: {local_cache}")
|
# print(f"local_cache: {local_cache}")
|
||||||
|
|
||||||
## Expected cache val: {"current_requests": 0, "current_tpm": 0, "current_rpm": 1}
|
# ## Expected cache val: {"current_requests": 0, "current_tpm": 0, "current_rpm": 1}
|
||||||
|
|
||||||
try:
|
# try:
|
||||||
await tpm_rpm_limiter.async_pre_call_hook(
|
# await tpm_rpm_limiter.async_pre_call_hook(
|
||||||
user_api_key_dict=user_api_key_dict,
|
# user_api_key_dict=user_api_key_dict,
|
||||||
cache=local_cache,
|
# cache=local_cache,
|
||||||
data={},
|
# data={},
|
||||||
call_type="",
|
# call_type="",
|
||||||
)
|
# )
|
||||||
|
|
||||||
pytest.fail(f"Expected call to fail")
|
# pytest.fail(f"Expected call to fail")
|
||||||
except Exception as e:
|
# except Exception as e:
|
||||||
assert e.status_code == 429 # type: ignore
|
# assert e.status_code == 429 # type: ignore
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
# @pytest.mark.asyncio
|
||||||
async def test_namespace():
|
# async def test_namespace():
|
||||||
"""
|
# """
|
||||||
- test if default namespace set via `proxyconfig._init_cache`
|
# - test if default namespace set via `proxyconfig._init_cache`
|
||||||
- respected for tpm/rpm caching
|
# - respected for tpm/rpm caching
|
||||||
"""
|
# """
|
||||||
from litellm.proxy.proxy_server import ProxyConfig
|
# from litellm.proxy.proxy_server import ProxyConfig
|
||||||
|
|
||||||
redis_usage_cache: Optional[RedisCache] = None
|
# redis_usage_cache: Optional[RedisCache] = None
|
||||||
cache_params = {"type": "redis", "namespace": "litellm_default"}
|
# cache_params = {"type": "redis", "namespace": "litellm_default"}
|
||||||
|
|
||||||
## INIT CACHE ##
|
# ## INIT CACHE ##
|
||||||
proxy_config = ProxyConfig()
|
# proxy_config = ProxyConfig()
|
||||||
setattr(litellm.proxy.proxy_server, "proxy_config", proxy_config)
|
# setattr(litellm.proxy.proxy_server, "proxy_config", proxy_config)
|
||||||
|
|
||||||
proxy_config._init_cache(cache_params=cache_params)
|
# proxy_config._init_cache(cache_params=cache_params)
|
||||||
|
|
||||||
redis_cache: Optional[RedisCache] = getattr(
|
# redis_cache: Optional[RedisCache] = getattr(
|
||||||
litellm.proxy.proxy_server, "redis_usage_cache"
|
# litellm.proxy.proxy_server, "redis_usage_cache"
|
||||||
)
|
# )
|
||||||
|
|
||||||
## CHECK IF NAMESPACE SET ##
|
# ## CHECK IF NAMESPACE SET ##
|
||||||
assert redis_cache.namespace == "litellm_default"
|
# assert redis_cache.namespace == "litellm_default"
|
||||||
|
|
||||||
## CHECK IF TPM/RPM RATE LIMITING WORKS ##
|
# ## CHECK IF TPM/RPM RATE LIMITING WORKS ##
|
||||||
await test_pre_call_hook_team_rpm_limits(_redis_usage_cache=redis_cache)
|
# await test_pre_call_hook_team_rpm_limits(_redis_usage_cache=redis_cache)
|
||||||
current_date = datetime.now().strftime("%Y-%m-%d")
|
# current_date = datetime.now().strftime("%Y-%m-%d")
|
||||||
current_hour = datetime.now().strftime("%H")
|
# current_hour = datetime.now().strftime("%H")
|
||||||
current_minute = datetime.now().strftime("%M")
|
# current_minute = datetime.now().strftime("%M")
|
||||||
precise_minute = f"{current_date}-{current_hour}-{current_minute}"
|
# precise_minute = f"{current_date}-{current_hour}-{current_minute}"
|
||||||
|
|
||||||
cache_key = "litellm_default:usage:{}".format(precise_minute)
|
# cache_key = "litellm_default:usage:{}".format(precise_minute)
|
||||||
value = await redis_cache.async_get_cache(key=cache_key)
|
# value = await redis_cache.async_get_cache(key=cache_key)
|
||||||
assert value is not None
|
# assert value is not None
|
||||||
|
|
|
@ -229,17 +229,21 @@ async def test_pre_call_hook_user_tpm_limits():
|
||||||
"""
|
"""
|
||||||
Test if error raised on hitting tpm limits
|
Test if error raised on hitting tpm limits
|
||||||
"""
|
"""
|
||||||
|
local_cache = DualCache()
|
||||||
# create user with tpm/rpm limits
|
# create user with tpm/rpm limits
|
||||||
|
user_id = "test-user"
|
||||||
|
user_obj = {"tpm_limit": 9, "rpm_limit": 10}
|
||||||
|
|
||||||
|
local_cache.set_cache(key=user_id, value=user_obj)
|
||||||
|
|
||||||
_api_key = "sk-12345"
|
_api_key = "sk-12345"
|
||||||
user_api_key_dict = UserAPIKeyAuth(
|
user_api_key_dict = UserAPIKeyAuth(
|
||||||
api_key=_api_key,
|
api_key=_api_key,
|
||||||
user_id="ishaan",
|
user_id=user_id,
|
||||||
user_id_rate_limits={"tpm_limit": 9, "rpm_limit": 10},
|
|
||||||
)
|
)
|
||||||
res = dict(user_api_key_dict)
|
res = dict(user_api_key_dict)
|
||||||
print("dict user", res)
|
print("dict user", res)
|
||||||
local_cache = DualCache()
|
|
||||||
parallel_request_handler = MaxParallelRequestsHandler()
|
parallel_request_handler = MaxParallelRequestsHandler()
|
||||||
|
|
||||||
await parallel_request_handler.async_pre_call_hook(
|
await parallel_request_handler.async_pre_call_hook(
|
||||||
|
@ -248,7 +252,7 @@ async def test_pre_call_hook_user_tpm_limits():
|
||||||
|
|
||||||
kwargs = {
|
kwargs = {
|
||||||
"litellm_params": {
|
"litellm_params": {
|
||||||
"metadata": {"user_api_key_user_id": "ishaan", "user_api_key": "gm"}
|
"metadata": {"user_api_key_user_id": user_id, "user_api_key": "gm"}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -734,7 +738,7 @@ async def test_bad_router_call():
|
||||||
request_count_api_key = f"{_api_key}::{precise_minute}::request_count"
|
request_count_api_key = f"{_api_key}::{precise_minute}::request_count"
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
parallel_request_handler.user_api_key_cache.get_cache(
|
parallel_request_handler.user_api_key_cache.get_cache( # type: ignore
|
||||||
key=request_count_api_key
|
key=request_count_api_key
|
||||||
)["current_requests"]
|
)["current_requests"]
|
||||||
== 1
|
== 1
|
||||||
|
@ -751,7 +755,7 @@ async def test_bad_router_call():
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
assert (
|
assert (
|
||||||
parallel_request_handler.user_api_key_cache.get_cache(
|
parallel_request_handler.user_api_key_cache.get_cache( # type: ignore
|
||||||
key=request_count_api_key
|
key=request_count_api_key
|
||||||
)["current_requests"]
|
)["current_requests"]
|
||||||
== 0
|
== 0
|
||||||
|
|
|
@ -1,9 +1,15 @@
|
||||||
|
"""
|
||||||
|
litellm.Router Types - includes RouterConfig, UpdateRouterConfig, ModelInfo etc
|
||||||
|
"""
|
||||||
|
|
||||||
from typing import List, Optional, Union, Dict, Tuple, Literal, TypedDict
|
from typing import List, Optional, Union, Dict, Tuple, Literal, TypedDict
|
||||||
|
import uuid
|
||||||
|
import enum
|
||||||
import httpx
|
import httpx
|
||||||
from pydantic import BaseModel, validator, Field
|
from pydantic import BaseModel, Field
|
||||||
|
import datetime
|
||||||
from .completion import CompletionRequest
|
from .completion import CompletionRequest
|
||||||
from .embedding import EmbeddingRequest
|
from .embedding import EmbeddingRequest
|
||||||
import uuid, enum
|
|
||||||
|
|
||||||
|
|
||||||
class ModelConfig(BaseModel):
|
class ModelConfig(BaseModel):
|
||||||
|
@ -76,6 +82,12 @@ class ModelInfo(BaseModel):
|
||||||
db_model: bool = (
|
db_model: bool = (
|
||||||
False # used for proxy - to separate models which are stored in the db vs. config.
|
False # used for proxy - to separate models which are stored in the db vs. config.
|
||||||
)
|
)
|
||||||
|
updated_at: Optional[datetime.datetime] = None
|
||||||
|
updated_by: Optional[str] = None
|
||||||
|
|
||||||
|
created_at: Optional[datetime.datetime] = None
|
||||||
|
created_by: Optional[str] = None
|
||||||
|
|
||||||
base_model: Optional[str] = (
|
base_model: Optional[str] = (
|
||||||
None # specify if the base model is azure/gpt-3.5-turbo etc for accurate cost tracking
|
None # specify if the base model is azure/gpt-3.5-turbo etc for accurate cost tracking
|
||||||
)
|
)
|
||||||
|
@ -426,3 +438,4 @@ class ModelGroupInfo(BaseModel):
|
||||||
supports_parallel_function_calling: bool = Field(default=False)
|
supports_parallel_function_calling: bool = Field(default=False)
|
||||||
supports_vision: bool = Field(default=False)
|
supports_vision: bool = Field(default=False)
|
||||||
supports_function_calling: bool = Field(default=False)
|
supports_function_calling: bool = Field(default=False)
|
||||||
|
supported_openai_params: List[str] = Field(default=[])
|
||||||
|
|
|
@ -15,10 +15,17 @@ class ProviderField(TypedDict):
|
||||||
|
|
||||||
|
|
||||||
class ModelInfo(TypedDict):
|
class ModelInfo(TypedDict):
|
||||||
max_tokens: int
|
"""
|
||||||
max_input_tokens: int
|
Model info for a given model, this is information found in litellm.model_prices_and_context_window.json
|
||||||
max_output_tokens: int
|
"""
|
||||||
|
|
||||||
|
max_tokens: Optional[int]
|
||||||
|
max_input_tokens: Optional[int]
|
||||||
|
max_output_tokens: Optional[int]
|
||||||
input_cost_per_token: float
|
input_cost_per_token: float
|
||||||
output_cost_per_token: float
|
output_cost_per_token: float
|
||||||
litellm_provider: str
|
litellm_provider: str
|
||||||
mode: str
|
mode: Literal[
|
||||||
|
"completion", "embedding", "image_generation", "chat", "audio_transcription"
|
||||||
|
]
|
||||||
|
supported_openai_params: Optional[List[str]]
|
||||||
|
|
|
@ -5797,30 +5797,11 @@ def get_optional_params(
|
||||||
model=model, custom_llm_provider=custom_llm_provider
|
model=model, custom_llm_provider=custom_llm_provider
|
||||||
)
|
)
|
||||||
_check_valid_arg(supported_params=supported_params)
|
_check_valid_arg(supported_params=supported_params)
|
||||||
if temperature is not None:
|
optional_params = litellm.DeepInfraConfig().map_openai_params(
|
||||||
if (
|
non_default_params=non_default_params,
|
||||||
temperature == 0 and model == "mistralai/Mistral-7B-Instruct-v0.1"
|
optional_params=optional_params,
|
||||||
): # this model does no support temperature == 0
|
model=model,
|
||||||
temperature = 0.0001 # close to 0
|
)
|
||||||
optional_params["temperature"] = temperature
|
|
||||||
if top_p:
|
|
||||||
optional_params["top_p"] = top_p
|
|
||||||
if n:
|
|
||||||
optional_params["n"] = n
|
|
||||||
if stream:
|
|
||||||
optional_params["stream"] = stream
|
|
||||||
if stop:
|
|
||||||
optional_params["stop"] = stop
|
|
||||||
if max_tokens:
|
|
||||||
optional_params["max_tokens"] = max_tokens
|
|
||||||
if presence_penalty:
|
|
||||||
optional_params["presence_penalty"] = presence_penalty
|
|
||||||
if frequency_penalty:
|
|
||||||
optional_params["frequency_penalty"] = frequency_penalty
|
|
||||||
if logit_bias:
|
|
||||||
optional_params["logit_bias"] = logit_bias
|
|
||||||
if user:
|
|
||||||
optional_params["user"] = user
|
|
||||||
elif custom_llm_provider == "perplexity":
|
elif custom_llm_provider == "perplexity":
|
||||||
supported_params = get_supported_openai_params(
|
supported_params = get_supported_openai_params(
|
||||||
model=model, custom_llm_provider=custom_llm_provider
|
model=model, custom_llm_provider=custom_llm_provider
|
||||||
|
@ -6604,19 +6585,7 @@ def get_supported_openai_params(
|
||||||
elif custom_llm_provider == "petals":
|
elif custom_llm_provider == "petals":
|
||||||
return ["max_tokens", "temperature", "top_p", "stream"]
|
return ["max_tokens", "temperature", "top_p", "stream"]
|
||||||
elif custom_llm_provider == "deepinfra":
|
elif custom_llm_provider == "deepinfra":
|
||||||
return [
|
return litellm.DeepInfraConfig().get_supported_openai_params()
|
||||||
"temperature",
|
|
||||||
"top_p",
|
|
||||||
"n",
|
|
||||||
"stream",
|
|
||||||
"stop",
|
|
||||||
"max_tokens",
|
|
||||||
"presence_penalty",
|
|
||||||
"frequency_penalty",
|
|
||||||
"logit_bias",
|
|
||||||
"user",
|
|
||||||
"response_format",
|
|
||||||
]
|
|
||||||
elif custom_llm_provider == "perplexity":
|
elif custom_llm_provider == "perplexity":
|
||||||
return [
|
return [
|
||||||
"temperature",
|
"temperature",
|
||||||
|
@ -7107,6 +7076,7 @@ def get_model_info(model: str) -> ModelInfo:
|
||||||
- output_cost_per_token (float): The cost per token for output.
|
- output_cost_per_token (float): The cost per token for output.
|
||||||
- litellm_provider (str): The provider of the model (e.g., "openai").
|
- litellm_provider (str): The provider of the model (e.g., "openai").
|
||||||
- mode (str): The mode of the model (e.g., "chat" or "completion").
|
- mode (str): The mode of the model (e.g., "chat" or "completion").
|
||||||
|
- supported_openai_params (List[str]): A list of supported OpenAI parameters for the model.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
Exception: If the model is not mapped yet.
|
Exception: If the model is not mapped yet.
|
||||||
|
@ -7118,9 +7088,11 @@ def get_model_info(model: str) -> ModelInfo:
|
||||||
"input_cost_per_token": 0.00003,
|
"input_cost_per_token": 0.00003,
|
||||||
"output_cost_per_token": 0.00006,
|
"output_cost_per_token": 0.00006,
|
||||||
"litellm_provider": "openai",
|
"litellm_provider": "openai",
|
||||||
"mode": "chat"
|
"mode": "chat",
|
||||||
|
"supported_openai_params": ["temperature", "max_tokens", "top_p", "frequency_penalty", "presence_penalty"]
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
supported_openai_params: Union[List[str], None] = []
|
||||||
|
|
||||||
def _get_max_position_embeddings(model_name):
|
def _get_max_position_embeddings(model_name):
|
||||||
# Construct the URL for the config.json file
|
# Construct the URL for the config.json file
|
||||||
|
@ -7148,9 +7120,18 @@ def get_model_info(model: str) -> ModelInfo:
|
||||||
azure_llms = litellm.azure_llms
|
azure_llms = litellm.azure_llms
|
||||||
if model in azure_llms:
|
if model in azure_llms:
|
||||||
model = azure_llms[model]
|
model = azure_llms[model]
|
||||||
if model in litellm.model_cost:
|
##########################
|
||||||
return litellm.model_cost[model]
|
# Get custom_llm_provider
|
||||||
model, custom_llm_provider, _, _ = get_llm_provider(model=model)
|
split_model, custom_llm_provider = model, ""
|
||||||
|
try:
|
||||||
|
split_model, custom_llm_provider, _, _ = get_llm_provider(model=model)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
#########################
|
||||||
|
|
||||||
|
supported_openai_params = litellm.get_supported_openai_params(
|
||||||
|
model=model, custom_llm_provider=custom_llm_provider
|
||||||
|
)
|
||||||
if custom_llm_provider == "huggingface":
|
if custom_llm_provider == "huggingface":
|
||||||
max_tokens = _get_max_position_embeddings(model_name=model)
|
max_tokens = _get_max_position_embeddings(model_name=model)
|
||||||
return {
|
return {
|
||||||
|
@ -7159,15 +7140,26 @@ def get_model_info(model: str) -> ModelInfo:
|
||||||
"output_cost_per_token": 0,
|
"output_cost_per_token": 0,
|
||||||
"litellm_provider": "huggingface",
|
"litellm_provider": "huggingface",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
|
"supported_openai_params": supported_openai_params,
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
"""
|
"""
|
||||||
Check if model in model cost map
|
Check if:
|
||||||
|
1. 'model' in litellm.model_cost. Checks "groq/llama3-8b-8192" in litellm.model_cost
|
||||||
|
2. 'split_model' in litellm.model_cost. Checks "llama3-8b-8192" in litellm.model_cost
|
||||||
"""
|
"""
|
||||||
if model in litellm.model_cost:
|
if model in litellm.model_cost:
|
||||||
return litellm.model_cost[model]
|
_model_info = litellm.model_cost[model]
|
||||||
|
_model_info["supported_openai_params"] = supported_openai_params
|
||||||
|
return _model_info
|
||||||
|
if split_model in litellm.model_cost:
|
||||||
|
_model_info = litellm.model_cost[split_model]
|
||||||
|
_model_info["supported_openai_params"] = supported_openai_params
|
||||||
|
return _model_info
|
||||||
else:
|
else:
|
||||||
raise Exception()
|
raise ValueError(
|
||||||
|
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
|
||||||
|
)
|
||||||
except:
|
except:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
|
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.38.10"
|
version = "1.38.11"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -79,7 +79,7 @@ requires = ["poetry-core", "wheel"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "1.38.10"
|
version = "1.38.11"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[39712,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"608\",\"static/chunks/608-d128caa3cfe973c1.js\",\"931\",\"static/chunks/app/page-e266cb0126026d40.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"dYIEEO-62OCgyckEhgBd-\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[39712,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"608\",\"static/chunks/608-d128caa3cfe973c1.js\",\"931\",\"static/chunks/app/page-76d278f96a0e9768.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"D_ZUmMtLMPSa4aQQUJtKt\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[39712,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","608","static/chunks/608-d128caa3cfe973c1.js","931","static/chunks/app/page-e266cb0126026d40.js"],""]
|
3:I[39712,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","608","static/chunks/608-d128caa3cfe973c1.js","931","static/chunks/app/page-76d278f96a0e9768.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["dYIEEO-62OCgyckEhgBd-",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5d93d4a9fa59d72f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["D_ZUmMtLMPSa4aQQUJtKt",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5d93d4a9fa59d72f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -86,6 +86,8 @@ import type { UploadProps } from "antd";
|
||||||
import { Upload } from "antd";
|
import { Upload } from "antd";
|
||||||
import TimeToFirstToken from "./model_metrics/time_to_first_token";
|
import TimeToFirstToken from "./model_metrics/time_to_first_token";
|
||||||
import DynamicFields from "./model_add/dynamic_form";
|
import DynamicFields from "./model_add/dynamic_form";
|
||||||
|
import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
|
||||||
|
|
||||||
interface ModelDashboardProps {
|
interface ModelDashboardProps {
|
||||||
accessToken: string | null;
|
accessToken: string | null;
|
||||||
token: string | null;
|
token: string | null;
|
||||||
|
@ -269,6 +271,8 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
const [selectedProvider, setSelectedProvider] = useState<String>("OpenAI");
|
const [selectedProvider, setSelectedProvider] = useState<String>("OpenAI");
|
||||||
const [healthCheckResponse, setHealthCheckResponse] = useState<string>("");
|
const [healthCheckResponse, setHealthCheckResponse] = useState<string>("");
|
||||||
const [editModalVisible, setEditModalVisible] = useState<boolean>(false);
|
const [editModalVisible, setEditModalVisible] = useState<boolean>(false);
|
||||||
|
const [infoModalVisible, setInfoModalVisible] = useState<boolean>(false);
|
||||||
|
|
||||||
const [selectedModel, setSelectedModel] = useState<any>(null);
|
const [selectedModel, setSelectedModel] = useState<any>(null);
|
||||||
const [availableModelGroups, setAvailableModelGroups] = useState<
|
const [availableModelGroups, setAvailableModelGroups] = useState<
|
||||||
Array<string>
|
Array<string>
|
||||||
|
@ -297,6 +301,15 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
useState<RetryPolicyObject | null>(null);
|
useState<RetryPolicyObject | null>(null);
|
||||||
const [defaultRetry, setDefaultRetry] = useState<number>(0);
|
const [defaultRetry, setDefaultRetry] = useState<number>(0);
|
||||||
|
|
||||||
|
function formatCreatedAt(createdAt: string | null) {
|
||||||
|
if (createdAt) {
|
||||||
|
const date = new Date(createdAt);
|
||||||
|
const options = { month: 'long', day: 'numeric', year: 'numeric' };
|
||||||
|
return date.toLocaleDateString('en-US');
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
const EditModelModal: React.FC<EditModelModalProps> = ({
|
const EditModelModal: React.FC<EditModelModalProps> = ({
|
||||||
visible,
|
visible,
|
||||||
onCancel,
|
onCancel,
|
||||||
|
@ -423,11 +436,21 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
setEditModalVisible(true);
|
setEditModalVisible(true);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const handleInfoClick = (model: any) => {
|
||||||
|
setSelectedModel(model);
|
||||||
|
setInfoModalVisible(true);
|
||||||
|
};
|
||||||
|
|
||||||
const handleEditCancel = () => {
|
const handleEditCancel = () => {
|
||||||
setEditModalVisible(false);
|
setEditModalVisible(false);
|
||||||
setSelectedModel(null);
|
setSelectedModel(null);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const handleInfoCancel = () => {
|
||||||
|
setInfoModalVisible(false);
|
||||||
|
setSelectedModel(null);
|
||||||
|
};
|
||||||
|
|
||||||
const handleEditSubmit = async (formValues: Record<string, any>) => {
|
const handleEditSubmit = async (formValues: Record<string, any>) => {
|
||||||
// Call API to update team with teamId and values
|
// Call API to update team with teamId and values
|
||||||
|
|
||||||
|
@ -1039,7 +1062,6 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
</div>
|
</div>
|
||||||
<Card>
|
<Card>
|
||||||
<Table
|
<Table
|
||||||
className="mt-5"
|
|
||||||
style={{ maxWidth: "1500px", width: "100%" }}
|
style={{ maxWidth: "1500px", width: "100%" }}
|
||||||
>
|
>
|
||||||
<TableHead>
|
<TableHead>
|
||||||
|
@ -1049,6 +1071,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
maxWidth: "150px",
|
maxWidth: "150px",
|
||||||
whiteSpace: "normal",
|
whiteSpace: "normal",
|
||||||
wordBreak: "break-word",
|
wordBreak: "break-word",
|
||||||
|
fontSize: "11px"
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
Public Model Name
|
Public Model Name
|
||||||
|
@ -1058,6 +1081,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
maxWidth: "100px",
|
maxWidth: "100px",
|
||||||
whiteSpace: "normal",
|
whiteSpace: "normal",
|
||||||
wordBreak: "break-word",
|
wordBreak: "break-word",
|
||||||
|
fontSize: "11px"
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
Provider
|
Provider
|
||||||
|
@ -1068,25 +1092,18 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
maxWidth: "150px",
|
maxWidth: "150px",
|
||||||
whiteSpace: "normal",
|
whiteSpace: "normal",
|
||||||
wordBreak: "break-word",
|
wordBreak: "break-word",
|
||||||
|
fontSize: "11px"
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
API Base
|
API Base
|
||||||
</TableHeaderCell>
|
</TableHeaderCell>
|
||||||
)}
|
)}
|
||||||
<TableHeaderCell
|
|
||||||
style={{
|
|
||||||
maxWidth: "200px",
|
|
||||||
whiteSpace: "normal",
|
|
||||||
wordBreak: "break-word",
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
Extra litellm Params
|
|
||||||
</TableHeaderCell>
|
|
||||||
<TableHeaderCell
|
<TableHeaderCell
|
||||||
style={{
|
style={{
|
||||||
maxWidth: "85px",
|
maxWidth: "85px",
|
||||||
whiteSpace: "normal",
|
whiteSpace: "normal",
|
||||||
wordBreak: "break-word",
|
wordBreak: "break-word",
|
||||||
|
fontSize: "11px"
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
Input Price{" "}
|
Input Price{" "}
|
||||||
|
@ -1099,6 +1116,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
maxWidth: "85px",
|
maxWidth: "85px",
|
||||||
whiteSpace: "normal",
|
whiteSpace: "normal",
|
||||||
wordBreak: "break-word",
|
wordBreak: "break-word",
|
||||||
|
fontSize: "11px"
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
Output Price{" "}
|
Output Price{" "}
|
||||||
|
@ -1106,24 +1124,45 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
/1M Tokens ($)
|
/1M Tokens ($)
|
||||||
</p>
|
</p>
|
||||||
</TableHeaderCell>
|
</TableHeaderCell>
|
||||||
|
|
||||||
<TableHeaderCell
|
<TableHeaderCell
|
||||||
style={{
|
style={{
|
||||||
maxWidth: "120px",
|
maxWidth: "100px",
|
||||||
whiteSpace: "normal",
|
whiteSpace: "normal",
|
||||||
wordBreak: "break-word",
|
wordBreak: "break-word",
|
||||||
|
fontSize: "11px"
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
Max Tokens
|
{
|
||||||
|
premiumUser ? "Created At" : <a href="https://forms.gle/W3U4PZpJGFHWtHyA9" target="_blank" style={{color: "#72bcd4" }}> ✨ Created At</a>
|
||||||
|
}
|
||||||
|
|
||||||
|
</TableHeaderCell>
|
||||||
|
<TableHeaderCell
|
||||||
|
style={{
|
||||||
|
maxWidth: "100px",
|
||||||
|
whiteSpace: "normal",
|
||||||
|
wordBreak: "break-word",
|
||||||
|
fontSize: "11px"
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{
|
||||||
|
premiumUser ? "Created By" : <a href="https://forms.gle/W3U4PZpJGFHWtHyA9" target="_blank" style={{color: "#72bcd4" }}> ✨ Created By</a>
|
||||||
|
}
|
||||||
</TableHeaderCell>
|
</TableHeaderCell>
|
||||||
<TableHeaderCell
|
<TableHeaderCell
|
||||||
style={{
|
style={{
|
||||||
maxWidth: "50px",
|
maxWidth: "50px",
|
||||||
whiteSpace: "normal",
|
whiteSpace: "normal",
|
||||||
wordBreak: "break-word",
|
wordBreak: "break-word",
|
||||||
|
fontSize: "11px"
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
Status
|
Status
|
||||||
</TableHeaderCell>
|
</TableHeaderCell>
|
||||||
|
<TableHeaderCell>
|
||||||
|
|
||||||
|
</TableHeaderCell>
|
||||||
</TableRow>
|
</TableRow>
|
||||||
</TableHead>
|
</TableHead>
|
||||||
<TableBody>
|
<TableBody>
|
||||||
|
@ -1137,15 +1176,17 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
selectedModelGroup === ""
|
selectedModelGroup === ""
|
||||||
)
|
)
|
||||||
.map((model: any, index: number) => (
|
.map((model: any, index: number) => (
|
||||||
<TableRow key={index}>
|
<TableRow key={index} style={{ maxHeight: "1px", minHeight: "1px" }}>
|
||||||
<TableCell
|
<TableCell
|
||||||
style={{
|
style={{
|
||||||
maxWidth: "150px",
|
maxWidth: "100px",
|
||||||
whiteSpace: "normal",
|
whiteSpace: "normal",
|
||||||
wordBreak: "break-word",
|
wordBreak: "break-word",
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
<Text>{model.model_name}</Text>
|
<p style={{ fontSize: "10px" }}>
|
||||||
|
{model.model_name || "-"}
|
||||||
|
</p>
|
||||||
</TableCell>
|
</TableCell>
|
||||||
<TableCell
|
<TableCell
|
||||||
style={{
|
style={{
|
||||||
|
@ -1154,41 +1195,34 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
wordBreak: "break-word",
|
wordBreak: "break-word",
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
{model.provider}
|
<p style={{ fontSize: "10px" }}>
|
||||||
|
{model.provider || "-"}
|
||||||
|
</p>
|
||||||
</TableCell>
|
</TableCell>
|
||||||
{userRole === "Admin" && (
|
{userRole === "Admin" && (
|
||||||
<TableCell
|
<TableCell
|
||||||
style={{
|
|
||||||
maxWidth: "150px",
|
|
||||||
whiteSpace: "normal",
|
|
||||||
wordBreak: "break-word",
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
{model.api_base}
|
|
||||||
</TableCell>
|
|
||||||
)}
|
|
||||||
<TableCell
|
|
||||||
style={{
|
style={{
|
||||||
maxWidth: "200px",
|
maxWidth: "150px",
|
||||||
whiteSpace: "normal",
|
whiteSpace: "normal",
|
||||||
wordBreak: "break-word",
|
wordBreak: "break-word",
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
<Accordion>
|
<Tooltip title={model && model.api_base}>
|
||||||
<AccordionHeader>
|
<pre
|
||||||
<Text>Litellm params</Text>
|
style={{
|
||||||
</AccordionHeader>
|
maxWidth: "150px",
|
||||||
<AccordionBody>
|
whiteSpace: "normal",
|
||||||
<pre>
|
wordBreak: "break-word",
|
||||||
{JSON.stringify(
|
fontSize: "10px",
|
||||||
model.cleanedLitellmParams,
|
}}
|
||||||
null,
|
title={model && model.api_base ? model.api_base : ""}
|
||||||
2
|
>
|
||||||
)}
|
{model && model.api_base ? model.api_base.slice(0, 20) : "-"}
|
||||||
</pre>
|
</pre>
|
||||||
</AccordionBody>
|
</Tooltip>
|
||||||
</Accordion>
|
|
||||||
</TableCell>
|
</TableCell>
|
||||||
|
|
||||||
|
)}
|
||||||
<TableCell
|
<TableCell
|
||||||
style={{
|
style={{
|
||||||
maxWidth: "80px",
|
maxWidth: "80px",
|
||||||
|
@ -1196,6 +1230,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
wordBreak: "break-word",
|
wordBreak: "break-word",
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
|
<pre style={{ fontSize: "10px" }}>
|
||||||
{model.input_cost
|
{model.input_cost
|
||||||
? model.input_cost
|
? model.input_cost
|
||||||
: model.litellm_params.input_cost_per_token
|
: model.litellm_params.input_cost_per_token
|
||||||
|
@ -1205,6 +1240,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
) * 1000000
|
) * 1000000
|
||||||
).toFixed(2)
|
).toFixed(2)
|
||||||
: null}
|
: null}
|
||||||
|
</pre>
|
||||||
</TableCell>
|
</TableCell>
|
||||||
<TableCell
|
<TableCell
|
||||||
style={{
|
style={{
|
||||||
|
@ -1213,6 +1249,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
wordBreak: "break-word",
|
wordBreak: "break-word",
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
|
<pre style={{ fontSize: "10px" }}>
|
||||||
{model.output_cost
|
{model.output_cost
|
||||||
? model.output_cost
|
? model.output_cost
|
||||||
: model.litellm_params.output_cost_per_token
|
: model.litellm_params.output_cost_per_token
|
||||||
|
@ -1222,17 +1259,21 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
) * 1000000
|
) * 1000000
|
||||||
).toFixed(2)
|
).toFixed(2)
|
||||||
: null}
|
: null}
|
||||||
|
</pre>
|
||||||
</TableCell>
|
</TableCell>
|
||||||
<TableCell
|
<TableCell>
|
||||||
style={{
|
<p style={{ fontSize: "10px" }}>
|
||||||
maxWidth: "120px",
|
{
|
||||||
whiteSpace: "normal",
|
premiumUser ? formatCreatedAt(model.model_info.created_at) || "-" : "-"
|
||||||
wordBreak: "break-word",
|
}
|
||||||
}}
|
</p>
|
||||||
>
|
|
||||||
<p style={{ fontSize: "10px" }}>
|
</TableCell>
|
||||||
Max Tokens: {model.max_tokens} <br></br>
|
<TableCell>
|
||||||
Max Input Tokens: {model.max_input_tokens}
|
<p style={{ fontSize: "10px" }}>
|
||||||
|
{
|
||||||
|
premiumUser ? model.model_info.created_by || "-" : "-"
|
||||||
|
}
|
||||||
</p>
|
</p>
|
||||||
</TableCell>
|
</TableCell>
|
||||||
<TableCell
|
<TableCell
|
||||||
|
@ -1248,7 +1289,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
size="xs"
|
size="xs"
|
||||||
className="text-white"
|
className="text-white"
|
||||||
>
|
>
|
||||||
<p style={{ fontSize: "10px" }}>DB Model</p>
|
<p style={{ fontSize: "8px" }}>DB Model</p>
|
||||||
</Badge>
|
</Badge>
|
||||||
) : (
|
) : (
|
||||||
<Badge
|
<Badge
|
||||||
|
@ -1256,26 +1297,42 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
size="xs"
|
size="xs"
|
||||||
className="text-black"
|
className="text-black"
|
||||||
>
|
>
|
||||||
<p style={{ fontSize: "10px" }}>Config Model</p>
|
<p style={{ fontSize: "8px" }}>Config Model</p>
|
||||||
</Badge>
|
</Badge>
|
||||||
)}
|
)}
|
||||||
</TableCell>
|
</TableCell>
|
||||||
<TableCell
|
<TableCell
|
||||||
style={{
|
style={{
|
||||||
maxWidth: "100px",
|
maxWidth: "150px",
|
||||||
whiteSpace: "normal",
|
whiteSpace: "normal",
|
||||||
wordBreak: "break-word",
|
wordBreak: "break-word",
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
|
<Grid numItems={3}>
|
||||||
|
<Col>
|
||||||
|
<Icon
|
||||||
|
icon={InformationCircleIcon}
|
||||||
|
size="sm"
|
||||||
|
onClick={() => handleInfoClick(model)}
|
||||||
|
/>
|
||||||
|
</Col>
|
||||||
|
<Col>
|
||||||
<Icon
|
<Icon
|
||||||
icon={PencilAltIcon}
|
icon={PencilAltIcon}
|
||||||
size="sm"
|
size="sm"
|
||||||
onClick={() => handleEditClick(model)}
|
onClick={() => handleEditClick(model)}
|
||||||
/>
|
/>
|
||||||
|
</Col>
|
||||||
|
|
||||||
|
<Col>
|
||||||
<DeleteModelButton
|
<DeleteModelButton
|
||||||
modelID={model.model_info.id}
|
modelID={model.model_info.id}
|
||||||
accessToken={accessToken}
|
accessToken={accessToken}
|
||||||
/>
|
/>
|
||||||
|
</Col>
|
||||||
|
|
||||||
|
</Grid>
|
||||||
|
|
||||||
</TableCell>
|
</TableCell>
|
||||||
</TableRow>
|
</TableRow>
|
||||||
))}
|
))}
|
||||||
|
@ -1289,6 +1346,20 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
model={selectedModel}
|
model={selectedModel}
|
||||||
onSubmit={handleEditSubmit}
|
onSubmit={handleEditSubmit}
|
||||||
/>
|
/>
|
||||||
|
<Modal
|
||||||
|
title={selectedModel && selectedModel.model_name}
|
||||||
|
visible={infoModalVisible}
|
||||||
|
width={800}
|
||||||
|
footer={null}
|
||||||
|
onCancel={handleInfoCancel}
|
||||||
|
>
|
||||||
|
|
||||||
|
<Title>Model Info</Title>
|
||||||
|
<SyntaxHighlighter language="json" >
|
||||||
|
{selectedModel && JSON.stringify(selectedModel, null, 2)}
|
||||||
|
</SyntaxHighlighter>
|
||||||
|
|
||||||
|
</Modal>
|
||||||
</TabPanel>
|
</TabPanel>
|
||||||
<TabPanel className="h-full">
|
<TabPanel className="h-full">
|
||||||
<Title2 level={2}>Add new model</Title2>
|
<Title2 level={2}>Add new model</Title2>
|
||||||
|
|
|
@ -1,20 +1,26 @@
|
||||||
import React, { useEffect, useState } from 'react';
|
import React, { useEffect, useState } from "react";
|
||||||
|
|
||||||
import { modelHubCall } from "./networking";
|
import { modelHubCall } from "./networking";
|
||||||
|
|
||||||
import { Card, Text, Title, Grid, Button, Badge, Tab,
|
import {
|
||||||
TabGroup,
|
Card,
|
||||||
TabList,
|
Text,
|
||||||
TabPanel,
|
Title,
|
||||||
TabPanels, } from "@tremor/react";
|
Grid,
|
||||||
|
Button,
|
||||||
|
Badge,
|
||||||
|
Tab,
|
||||||
|
TabGroup,
|
||||||
|
TabList,
|
||||||
|
TabPanel,
|
||||||
|
TabPanels,
|
||||||
|
} from "@tremor/react";
|
||||||
|
|
||||||
import { RightOutlined, CopyOutlined } from '@ant-design/icons';
|
import { RightOutlined, CopyOutlined } from "@ant-design/icons";
|
||||||
|
|
||||||
import { Modal, Tooltip } from 'antd';
|
import { Modal, Tooltip } from "antd";
|
||||||
import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
|
import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
interface ModelHubProps {
|
interface ModelHubProps {
|
||||||
userID: string | null;
|
userID: string | null;
|
||||||
userRole: string | null;
|
userRole: string | null;
|
||||||
|
@ -22,7 +28,6 @@ interface ModelHubProps {
|
||||||
accessToken: string | null;
|
accessToken: string | null;
|
||||||
keys: any; // Replace with the appropriate type for 'keys' prop
|
keys: any; // Replace with the appropriate type for 'keys' prop
|
||||||
premiumUser: boolean;
|
premiumUser: boolean;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
interface ModelInfo {
|
interface ModelInfo {
|
||||||
|
@ -32,15 +37,13 @@ interface ModelInfo {
|
||||||
supports_vision: boolean;
|
supports_vision: boolean;
|
||||||
max_input_tokens?: number;
|
max_input_tokens?: number;
|
||||||
max_output_tokens?: number;
|
max_output_tokens?: number;
|
||||||
|
supported_openai_params?: string[];
|
||||||
// Add other properties if needed
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
// Add other properties if needed
|
||||||
|
}
|
||||||
|
|
||||||
const ModelHub: React.FC<ModelHubProps> = ({
|
const ModelHub: React.FC<ModelHubProps> = ({
|
||||||
|
|
||||||
userID,
|
userID,
|
||||||
|
|
||||||
userRole,
|
userRole,
|
||||||
|
@ -52,140 +55,80 @@ const ModelHub: React.FC<ModelHubProps> = ({
|
||||||
keys,
|
keys,
|
||||||
|
|
||||||
premiumUser,
|
premiumUser,
|
||||||
|
|
||||||
}) => {
|
}) => {
|
||||||
|
const [modelHubData, setModelHubData] = useState<ModelInfo[] | null>(null);
|
||||||
const [modelHubData, setModelHubData] = useState<ModelInfo[] | null>(null);
|
|
||||||
const [isModalVisible, setIsModalVisible] = useState(false);
|
const [isModalVisible, setIsModalVisible] = useState(false);
|
||||||
const [selectedModel, setSelectedModel] = useState<null | ModelInfo>(null);
|
const [selectedModel, setSelectedModel] = useState<null | ModelInfo>(null);
|
||||||
|
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
|
|
||||||
if (!accessToken || !token || !userRole || !userID) {
|
if (!accessToken || !token || !userRole || !userID) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
const fetchData = async () => {
|
const fetchData = async () => {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
const _modelHubData = await modelHubCall(accessToken, userID, userRole);
|
const _modelHubData = await modelHubCall(accessToken, userID, userRole);
|
||||||
|
|
||||||
console.log("ModelHubData:", _modelHubData);
|
console.log("ModelHubData:", _modelHubData);
|
||||||
|
|
||||||
setModelHubData(_modelHubData.data);
|
setModelHubData(_modelHubData.data);
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
|
||||||
console.error("There was an error fetching the model data", error);
|
console.error("There was an error fetching the model data", error);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
fetchData();
|
fetchData();
|
||||||
|
|
||||||
}, [accessToken, token, userRole, userID]);
|
}, [accessToken, token, userRole, userID]);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
const showModal = (model: ModelInfo) => {
|
const showModal = (model: ModelInfo) => {
|
||||||
|
|
||||||
setSelectedModel(model);
|
setSelectedModel(model);
|
||||||
|
|
||||||
setIsModalVisible(true);
|
setIsModalVisible(true);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
const handleOk = () => {
|
const handleOk = () => {
|
||||||
|
|
||||||
setIsModalVisible(false);
|
setIsModalVisible(false);
|
||||||
|
|
||||||
setSelectedModel(null);
|
setSelectedModel(null);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
const handleCancel = () => {
|
const handleCancel = () => {
|
||||||
|
|
||||||
setIsModalVisible(false);
|
setIsModalVisible(false);
|
||||||
|
|
||||||
setSelectedModel(null);
|
setSelectedModel(null);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
const copyToClipboard = (text: string) => {
|
const copyToClipboard = (text: string) => {
|
||||||
|
|
||||||
navigator.clipboard.writeText(text);
|
navigator.clipboard.writeText(text);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
|
||||||
<div>
|
<div>
|
||||||
|
<div className="w-full m-2 mt-2 p-8">
|
||||||
|
<div className="relative w-full"></div>
|
||||||
|
|
||||||
<div className="w-full m-2 mt-2 p-8">
|
<div className="flex items-center">
|
||||||
|
<Title className="ml-8 text-center ">Model Hub</Title>
|
||||||
<div className="relative w-full">
|
<Button className="ml-4">
|
||||||
|
<a href="https://forms.gle/W3U4PZpJGFHWtHyA9" target="_blank">
|
||||||
|
✨ Make Public
|
||||||
|
</a>
|
||||||
</div>
|
</Button>
|
||||||
|
|
||||||
|
|
||||||
<div className='flex items-center'>
|
|
||||||
|
|
||||||
<Title className='ml-8 text-center '>Model Hub</Title>
|
|
||||||
<Button className='ml-4'>
|
|
||||||
<a href="https://forms.gle/W3U4PZpJGFHWtHyA9" target="_blank">
|
|
||||||
✨ Share
|
|
||||||
</a>
|
|
||||||
|
|
||||||
|
|
||||||
</Button>
|
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
||||||
<div className="grid grid-cols-2 gap-6 sm:grid-cols-3 lg:grid-cols-4">
|
<div className="grid grid-cols-2 gap-6 sm:grid-cols-3 lg:grid-cols-4">
|
||||||
|
{modelHubData &&
|
||||||
|
modelHubData.map((model: ModelInfo) => (
|
||||||
{modelHubData && modelHubData.map((model: ModelInfo) => (
|
<Card key={model.model_group} className="mt-5 mx-8">
|
||||||
|
<pre className="flex justify-between">
|
||||||
<Card
|
<Title>{model.model_group}</Title>
|
||||||
|
<Tooltip title={model.model_group}>
|
||||||
key={model.model_group}
|
<CopyOutlined
|
||||||
|
onClick={() => copyToClipboard(model.model_group)}
|
||||||
className="mt-5 mx-8"
|
style={{ cursor: "pointer", marginRight: "10px" }}
|
||||||
|
/>
|
||||||
>
|
</Tooltip>
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
|
||||||
<pre className='flex justify-between'>
|
|
||||||
|
|
||||||
|
|
||||||
<Title>{model.model_group}</Title>
|
|
||||||
<Tooltip title={model.model_group}>
|
|
||||||
|
|
||||||
<CopyOutlined onClick={() => copyToClipboard(model.model_group)} style={{ cursor: 'pointer', marginRight: '10px' }} />
|
|
||||||
|
|
||||||
</Tooltip>
|
|
||||||
|
|
||||||
</pre>
|
|
||||||
|
|
||||||
<div className='my-5'>
|
<div className='my-5'>
|
||||||
|
|
||||||
<Text>Mode: {model.mode}</Text>
|
<Text>Mode: {model.mode}</Text>
|
||||||
|
@ -193,52 +136,37 @@ const ModelHub: React.FC<ModelHubProps> = ({
|
||||||
<Text>Supports Vision: {model?.supports_vision == true ? "Yes" : "No"}</Text>
|
<Text>Supports Vision: {model?.supports_vision == true ? "Yes" : "No"}</Text>
|
||||||
<Text>Max Input Tokens: {model?.max_input_tokens ? model?.max_input_tokens : "N/A"}</Text>
|
<Text>Max Input Tokens: {model?.max_input_tokens ? model?.max_input_tokens : "N/A"}</Text>
|
||||||
<Text>Max Output Tokens: {model?.max_output_tokens ? model?.max_output_tokens : "N/A"}</Text>
|
<Text>Max Output Tokens: {model?.max_output_tokens ? model?.max_output_tokens : "N/A"}</Text>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
<div style={{ marginTop: "auto", textAlign: "right" }}>
|
||||||
<div style={{ marginTop: 'auto', textAlign: 'right' }}>
|
<a
|
||||||
|
href="#"
|
||||||
|
onClick={() => showModal(model)}
|
||||||
|
style={{ color: "#1890ff", fontSize: "smaller" }}
|
||||||
<a href="#" onClick={() => showModal(model)} style={{ color: '#1890ff', fontSize: 'smaller' }}>
|
>
|
||||||
|
View more <RightOutlined />
|
||||||
View more <RightOutlined />
|
</a>
|
||||||
|
</div>
|
||||||
</a>
|
</Card>
|
||||||
|
))}
|
||||||
</div>
|
|
||||||
|
|
||||||
</Card>
|
|
||||||
|
|
||||||
))}
|
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<Modal
|
<Modal
|
||||||
|
title={selectedModel && selectedModel.model_group ? selectedModel.model_group : "Unknown Model"}
|
||||||
title="Model Usage"
|
|
||||||
width={800}
|
width={800}
|
||||||
|
|
||||||
visible={isModalVisible}
|
visible={isModalVisible}
|
||||||
footer={null}
|
footer={null}
|
||||||
|
|
||||||
onOk={handleOk}
|
onOk={handleOk}
|
||||||
|
|
||||||
onCancel={handleCancel}
|
onCancel={handleCancel}
|
||||||
|
|
||||||
>
|
>
|
||||||
|
|
||||||
{selectedModel && (
|
{selectedModel && (
|
||||||
|
|
||||||
<div>
|
<div>
|
||||||
|
<p className='mb-4'><strong>Model Information & Usage</strong></p>
|
||||||
<p><strong>Model Name:</strong> {selectedModel.model_group}</p>
|
|
||||||
|
|
||||||
<TabGroup>
|
<TabGroup>
|
||||||
<TabList>
|
<TabList>
|
||||||
<Tab>OpenAI Python SDK</Tab>
|
<Tab>OpenAI Python SDK</Tab>
|
||||||
|
<Tab>Supported OpenAI Params</Tab>
|
||||||
<Tab>LlamaIndex</Tab>
|
<Tab>LlamaIndex</Tab>
|
||||||
<Tab>Langchain Py</Tab>
|
<Tab>Langchain Py</Tab>
|
||||||
</TabList>
|
</TabList>
|
||||||
|
@ -267,8 +195,13 @@ print(response)
|
||||||
</SyntaxHighlighter>
|
</SyntaxHighlighter>
|
||||||
</TabPanel>
|
</TabPanel>
|
||||||
<TabPanel>
|
<TabPanel>
|
||||||
<SyntaxHighlighter language="python">
|
<SyntaxHighlighter language="python">
|
||||||
{`
|
{`${selectedModel.supported_openai_params?.map((param) => `${param}\n`).join('')}`}
|
||||||
|
</SyntaxHighlighter>
|
||||||
|
</TabPanel>
|
||||||
|
<TabPanel>
|
||||||
|
<SyntaxHighlighter language="python">
|
||||||
|
{`
|
||||||
import os, dotenv
|
import os, dotenv
|
||||||
|
|
||||||
from llama_index.llms import AzureOpenAI
|
from llama_index.llms import AzureOpenAI
|
||||||
|
@ -300,11 +233,11 @@ response = query_engine.query("What did the author do growing up?")
|
||||||
print(response)
|
print(response)
|
||||||
|
|
||||||
`}
|
`}
|
||||||
</SyntaxHighlighter>
|
</SyntaxHighlighter>
|
||||||
</TabPanel>
|
</TabPanel>
|
||||||
<TabPanel>
|
<TabPanel>
|
||||||
<SyntaxHighlighter language="python">
|
<SyntaxHighlighter language="python">
|
||||||
{`
|
{`
|
||||||
from langchain.chat_models import ChatOpenAI
|
from langchain.chat_models import ChatOpenAI
|
||||||
from langchain.prompts.chat import (
|
from langchain.prompts.chat import (
|
||||||
ChatPromptTemplate,
|
ChatPromptTemplate,
|
||||||
|
@ -332,27 +265,19 @@ response = chat(messages)
|
||||||
print(response)
|
print(response)
|
||||||
|
|
||||||
`}
|
`}
|
||||||
</SyntaxHighlighter>
|
</SyntaxHighlighter>
|
||||||
</TabPanel>
|
</TabPanel>
|
||||||
</TabPanels>
|
</TabPanels>
|
||||||
</TabGroup>
|
</TabGroup>
|
||||||
|
|
||||||
{/* <p><strong>Additional Params:</strong> {JSON.stringify(selectedModel.litellm_params)}</p> */}
|
{/* <p><strong>Additional Params:</strong> {JSON.stringify(selectedModel.litellm_params)}</p> */}
|
||||||
|
|
||||||
{/* Add other model details here */}
|
{/* Add other model details here */}
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
)}
|
)}
|
||||||
|
|
||||||
</Modal>
|
</Modal>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
);
|
);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export default ModelHub;
|
||||||
|
|
||||||
export default ModelHub;
|
|
||||||
|
|
|
@ -1,6 +1,10 @@
|
||||||
"use client";
|
"use client";
|
||||||
import React, { useState, useEffect } from "react";
|
import React, { useState, useEffect } from "react";
|
||||||
import { userInfoCall, modelAvailableCall, getTotalSpendCall } from "./networking";
|
import {
|
||||||
|
userInfoCall,
|
||||||
|
modelAvailableCall,
|
||||||
|
getTotalSpendCall,
|
||||||
|
} from "./networking";
|
||||||
import { Grid, Col, Card, Text, Title } from "@tremor/react";
|
import { Grid, Col, Card, Text, Title } from "@tremor/react";
|
||||||
import CreateKey from "./create_key_button";
|
import CreateKey from "./create_key_button";
|
||||||
import ViewKeyTable from "./view_key_table";
|
import ViewKeyTable from "./view_key_table";
|
||||||
|
@ -19,7 +23,6 @@ type UserSpendData = {
|
||||||
max_budget?: number | null;
|
max_budget?: number | null;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
interface UserDashboardProps {
|
interface UserDashboardProps {
|
||||||
userID: string | null;
|
userID: string | null;
|
||||||
userRole: string | null;
|
userRole: string | null;
|
||||||
|
@ -35,8 +38,8 @@ interface UserDashboardProps {
|
||||||
type TeamInterface = {
|
type TeamInterface = {
|
||||||
models: any[];
|
models: any[];
|
||||||
team_id: null;
|
team_id: null;
|
||||||
team_alias: String
|
team_alias: String;
|
||||||
}
|
};
|
||||||
|
|
||||||
const UserDashboard: React.FC<UserDashboardProps> = ({
|
const UserDashboard: React.FC<UserDashboardProps> = ({
|
||||||
userID,
|
userID,
|
||||||
|
@ -63,10 +66,10 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
|
||||||
const [teamSpend, setTeamSpend] = useState<number | null>(null);
|
const [teamSpend, setTeamSpend] = useState<number | null>(null);
|
||||||
const [userModels, setUserModels] = useState<string[]>([]);
|
const [userModels, setUserModels] = useState<string[]>([]);
|
||||||
const defaultTeam: TeamInterface = {
|
const defaultTeam: TeamInterface = {
|
||||||
"models": [],
|
models: [],
|
||||||
"team_alias": "Default Team",
|
team_alias: "Default Team",
|
||||||
"team_id": null
|
team_id: null,
|
||||||
}
|
};
|
||||||
const [selectedTeam, setSelectedTeam] = useState<any | null>(
|
const [selectedTeam, setSelectedTeam] = useState<any | null>(
|
||||||
teams ? teams[0] : defaultTeam
|
teams ? teams[0] : defaultTeam
|
||||||
);
|
);
|
||||||
|
@ -137,7 +140,14 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
|
||||||
} else {
|
} else {
|
||||||
const fetchData = async () => {
|
const fetchData = async () => {
|
||||||
try {
|
try {
|
||||||
const response = await userInfoCall(accessToken, userID, userRole, false, null, null);
|
const response = await userInfoCall(
|
||||||
|
accessToken,
|
||||||
|
userID,
|
||||||
|
userRole,
|
||||||
|
false,
|
||||||
|
null,
|
||||||
|
null
|
||||||
|
);
|
||||||
console.log(
|
console.log(
|
||||||
`received teams in user dashboard: ${Object.keys(
|
`received teams in user dashboard: ${Object.keys(
|
||||||
response
|
response
|
||||||
|
@ -152,12 +162,12 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
|
||||||
}
|
}
|
||||||
setKeys(response["keys"]); // Assuming this is the correct path to your data
|
setKeys(response["keys"]); // Assuming this is the correct path to your data
|
||||||
setTeams(response["teams"]);
|
setTeams(response["teams"]);
|
||||||
const teamsArray = [...response['teams']];
|
const teamsArray = [...response["teams"]];
|
||||||
if (teamsArray.length > 0) {
|
if (teamsArray.length > 0) {
|
||||||
console.log(`response['teams']: ${teamsArray}`);
|
console.log(`response['teams']: ${teamsArray}`);
|
||||||
setSelectedTeam(teamsArray[0]);
|
setSelectedTeam(teamsArray[0]);
|
||||||
} else {
|
} else {
|
||||||
setSelectedTeam(defaultTeam);
|
setSelectedTeam(defaultTeam);
|
||||||
}
|
}
|
||||||
sessionStorage.setItem(
|
sessionStorage.setItem(
|
||||||
"userData" + userID,
|
"userData" + userID,
|
||||||
|
@ -194,22 +204,30 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
|
||||||
fetchData();
|
fetchData();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}, [userID, token, accessToken, keys, userRole]);
|
}, [userID, token, accessToken, keys, userRole]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
// This code will run every time selectedTeam changes
|
// This code will run every time selectedTeam changes
|
||||||
if (keys !== null && selectedTeam !== null && selectedTeam !== undefined) {
|
if (
|
||||||
|
keys !== null &&
|
||||||
|
selectedTeam !== null &&
|
||||||
|
selectedTeam !== undefined &&
|
||||||
|
selectedTeam.team_id !== null
|
||||||
|
) {
|
||||||
let sum = 0;
|
let sum = 0;
|
||||||
for (const key of keys) {
|
for (const key of keys) {
|
||||||
if (selectedTeam.hasOwnProperty('team_id') && key.team_id !== null && key.team_id === selectedTeam.team_id) {
|
if (
|
||||||
|
selectedTeam.hasOwnProperty("team_id") &&
|
||||||
|
key.team_id !== null &&
|
||||||
|
key.team_id === selectedTeam.team_id
|
||||||
|
) {
|
||||||
sum += key.spend;
|
sum += key.spend;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
setTeamSpend(sum);
|
setTeamSpend(sum);
|
||||||
} else if (keys !== null) {
|
} else if (keys !== null) {
|
||||||
// sum the keys which don't have team-id set (default team)
|
// sum the keys which don't have team-id set (default team)
|
||||||
let sum = 0
|
let sum = 0;
|
||||||
for (const key of keys) {
|
for (const key of keys) {
|
||||||
sum += key.spend;
|
sum += key.spend;
|
||||||
}
|
}
|
||||||
|
@ -245,9 +263,8 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log("inside user dashboard, selected team", selectedTeam);
|
console.log("inside user dashboard, selected team", selectedTeam);
|
||||||
console.log(`teamSpend: ${teamSpend}`)
|
|
||||||
return (
|
return (
|
||||||
<div className="w-full mx-4">
|
<div className="w-full mx-4">
|
||||||
<Grid numItems={1} className="gap-2 p-8 h-[75vh] w-full mt-2">
|
<Grid numItems={1} className="gap-2 p-8 h-[75vh] w-full mt-2">
|
||||||
<Col numColSpan={1}>
|
<Col numColSpan={1}>
|
||||||
<ViewUserTeam
|
<ViewUserTeam
|
||||||
|
@ -261,8 +278,7 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
|
||||||
userRole={userRole}
|
userRole={userRole}
|
||||||
accessToken={accessToken}
|
accessToken={accessToken}
|
||||||
userSpend={teamSpend}
|
userSpend={teamSpend}
|
||||||
selectedTeam = {selectedTeam ? selectedTeam : null}
|
selectedTeam={selectedTeam ? selectedTeam : null}
|
||||||
|
|
||||||
/>
|
/>
|
||||||
|
|
||||||
<ViewKeyTable
|
<ViewKeyTable
|
||||||
|
@ -283,11 +299,15 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
|
||||||
data={keys}
|
data={keys}
|
||||||
setData={setKeys}
|
setData={setKeys}
|
||||||
/>
|
/>
|
||||||
<DashboardTeam teams={teams} setSelectedTeam={setSelectedTeam} userRole={userRole}/>
|
<DashboardTeam
|
||||||
|
teams={teams}
|
||||||
|
setSelectedTeam={setSelectedTeam}
|
||||||
|
userRole={userRole}
|
||||||
|
/>
|
||||||
</Col>
|
</Col>
|
||||||
</Grid>
|
</Grid>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
export default UserDashboard;
|
export default UserDashboard;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue