Merge branch 'main' into litellm_custom_pricing_ui_fix

This commit is contained in:
Krish Dholakia 2024-05-27 18:35:47 -07:00 committed by GitHub
commit dec482031b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
32 changed files with 638 additions and 859 deletions

View file

@ -11,7 +11,7 @@ Anthropic, OpenAI, Mistral, Llama and Gemini LLMs are Supported on Clarifai.
To obtain your Clarifai Personal access token follow this [link](https://docs.clarifai.com/clarifai-basics/authentication/personal-access-tokens/). Optionally the PAT can also be passed in `completion` function.
```python
os.environ["CALRIFAI_API_KEY"] = "YOUR_CLARIFAI_PAT" # CLARIFAI_PAT
os.environ["CLARIFAI_API_KEY"] = "YOUR_CLARIFAI_PAT" # CLARIFAI_PAT
```
## Usage

View file

@ -21,6 +21,7 @@ Features:
- ✅ Don't log/store specific requests to Langfuse, Sentry, etc. (eg confidential LLM requests)
- ✅ Tracking Spend for Custom Tags
- ✅ Custom Branding + Routes on Swagger Docs
- ✅ Audit Logs for `Created At, Created By` when Models Added
## Content Moderation

View file

@ -9,12 +9,3 @@ Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
[![Chat on WhatsApp](https://img.shields.io/static/v1?label=Chat%20on&message=WhatsApp&color=success&logo=WhatsApp&style=flat-square)](https://wa.link/huol9n) [![Chat on Discord](https://img.shields.io/static/v1?label=Chat%20on&message=Discord&color=blue&logo=Discord&style=flat-square)](https://discord.gg/wuPM9dRgDw)
## Stable Version
If you're running into problems with installation / Usage
Use the stable version of litellm
```shell
pip install litellm==0.1.819
```

View file

@ -766,7 +766,12 @@ from .llms.bedrock import (
AmazonMistralConfig,
AmazonBedrockGlobalConfig,
)
from .llms.openai import OpenAIConfig, OpenAITextCompletionConfig, MistralConfig
from .llms.openai import (
OpenAIConfig,
OpenAITextCompletionConfig,
MistralConfig,
DeepInfraConfig,
)
from .llms.azure import AzureOpenAIConfig, AzureOpenAIError
from .llms.watsonx import IBMWatsonXAIConfig
from .main import * # type: ignore

View file

@ -157,6 +157,101 @@ class MistralConfig:
)
if param == "seed":
optional_params["extra_body"] = {"random_seed": value}
if param == "response_format":
optional_params["response_format"] = value
return optional_params
class DeepInfraConfig:
"""
Reference: https://deepinfra.com/docs/advanced/openai_api
The class `DeepInfra` provides configuration for the DeepInfra's Chat Completions API interface. Below are the parameters:
"""
frequency_penalty: Optional[int] = None
function_call: Optional[Union[str, dict]] = None
functions: Optional[list] = None
logit_bias: Optional[dict] = None
max_tokens: Optional[int] = None
n: Optional[int] = None
presence_penalty: Optional[int] = None
stop: Optional[Union[str, list]] = None
temperature: Optional[int] = None
top_p: Optional[int] = None
response_format: Optional[dict] = None
tools: Optional[list] = None
tool_choice: Optional[Union[str, dict]] = None
def __init__(
self,
frequency_penalty: Optional[int] = None,
function_call: Optional[Union[str, dict]] = None,
functions: Optional[list] = None,
logit_bias: Optional[dict] = None,
max_tokens: Optional[int] = None,
n: Optional[int] = None,
presence_penalty: Optional[int] = None,
stop: Optional[Union[str, list]] = None,
temperature: Optional[int] = None,
top_p: Optional[int] = None,
response_format: Optional[dict] = None,
tools: Optional[list] = None,
tool_choice: Optional[Union[str, dict]] = None,
) -> None:
locals_ = locals().copy()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return {
k: v
for k, v in cls.__dict__.items()
if not k.startswith("__")
and not isinstance(
v,
(
types.FunctionType,
types.BuiltinFunctionType,
classmethod,
staticmethod,
),
)
and v is not None
}
def get_supported_openai_params(self):
return [
"frequency_penalty",
"function_call",
"functions",
"logit_bias",
"max_tokens",
"n",
"presence_penalty",
"stop",
"temperature",
"top_p",
"response_format",
"tools",
"tool_choice",
]
def map_openai_params(
self, non_default_params: dict, optional_params: dict, model: str
):
supported_openai_params = self.get_supported_openai_params()
for param, value in non_default_params.items():
if (
param == "temperature"
and value == 0
and model == "mistralai/Mistral-7B-Instruct-v0.1"
): # this model does no support temperature == 0
value = 0.0001 # close to 0
if param in supported_openai_params:
optional_params[param] = value
return optional_params
@ -197,6 +292,7 @@ class OpenAIConfig:
stop: Optional[Union[str, list]] = None
temperature: Optional[int] = None
top_p: Optional[int] = None
response_format: Optional[dict] = None
def __init__(
self,
@ -210,6 +306,7 @@ class OpenAIConfig:
stop: Optional[Union[str, list]] = None,
temperature: Optional[int] = None,
top_p: Optional[int] = None,
response_format: Optional[dict] = None,
) -> None:
locals_ = locals().copy()
for key, value in locals_.items():

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[39712,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"608\",\"static/chunks/608-d128caa3cfe973c1.js\",\"931\",\"static/chunks/app/page-e266cb0126026d40.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"dYIEEO-62OCgyckEhgBd-\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[39712,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"608\",\"static/chunks/608-d128caa3cfe973c1.js\",\"931\",\"static/chunks/app/page-76d278f96a0e9768.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"D_ZUmMtLMPSa4aQQUJtKt\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[39712,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","608","static/chunks/608-d128caa3cfe973c1.js","931","static/chunks/app/page-e266cb0126026d40.js"],""]
3:I[39712,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","608","static/chunks/608-d128caa3cfe973c1.js","931","static/chunks/app/page-76d278f96a0e9768.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["dYIEEO-62OCgyckEhgBd-",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5d93d4a9fa59d72f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["D_ZUmMtLMPSa4aQQUJtKt",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5d93d4a9fa59d72f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -984,10 +984,6 @@ class LiteLLM_VerificationToken(LiteLLMBase):
org_id: Optional[str] = None # org id for a given key
# hidden params used for parallel request limiting, not required to create a token
user_id_rate_limits: Optional[dict] = None
team_id_rate_limits: Optional[dict] = None
class Config:
protected_namespaces = ()

View file

@ -164,8 +164,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
# check if REQUEST ALLOWED for user_id
user_id = user_api_key_dict.user_id
if user_id is not None:
_user_id_rate_limits = user_api_key_dict.user_id_rate_limits
_user_id_rate_limits = await self.user_api_key_cache.async_get_cache(
key=user_id
)
# get user tpm/rpm limits
if _user_id_rate_limits is not None and isinstance(
_user_id_rate_limits, dict
@ -196,13 +197,8 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
## get team tpm/rpm limits
team_id = user_api_key_dict.team_id
if team_id is not None:
team_tpm_limit = getattr(user_api_key_dict, "team_tpm_limit", sys.maxsize)
if team_tpm_limit is None:
team_tpm_limit = sys.maxsize
team_rpm_limit = getattr(user_api_key_dict, "team_rpm_limit", sys.maxsize)
if team_rpm_limit is None:
team_rpm_limit = sys.maxsize
team_tpm_limit = user_api_key_dict.team_tpm_limit
team_rpm_limit = user_api_key_dict.team_rpm_limit
if team_tpm_limit is None:
team_tpm_limit = sys.maxsize

View file

@ -1,379 +0,0 @@
# What is this?
## Checks TPM/RPM Limits for a key/user/team on the proxy
## Works with Redis - if given
from typing import Optional, Literal
import litellm, traceback, sys
from litellm.caching import DualCache, RedisCache
from litellm.proxy._types import (
UserAPIKeyAuth,
LiteLLM_VerificationTokenView,
LiteLLM_UserTable,
LiteLLM_TeamTable,
)
from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger
from litellm import ModelResponse
from datetime import datetime
class _PROXY_MaxTPMRPMLimiter(CustomLogger):
user_api_key_cache = None
# Class variables or attributes
def __init__(self, internal_cache: Optional[DualCache]):
if internal_cache is None:
self.internal_cache = DualCache()
else:
self.internal_cache = internal_cache
def print_verbose(self, print_statement):
try:
verbose_proxy_logger.debug(print_statement)
if litellm.set_verbose:
print(print_statement) # noqa
except:
pass
## check if admin has set tpm/rpm limits for this key/user/team
def _check_limits_set(
self,
user_api_key_cache: DualCache,
key: Optional[str],
user_id: Optional[str],
team_id: Optional[str],
) -> bool:
## key
if key is not None:
key_val = user_api_key_cache.get_cache(key=key)
if isinstance(key_val, dict):
key_val = LiteLLM_VerificationTokenView(**key_val)
if isinstance(key_val, LiteLLM_VerificationTokenView):
user_api_key_tpm_limit = key_val.tpm_limit
user_api_key_rpm_limit = key_val.rpm_limit
if (
user_api_key_tpm_limit is not None
or user_api_key_rpm_limit is not None
):
return True
## team
if team_id is not None:
team_val = user_api_key_cache.get_cache(key=team_id)
if isinstance(team_val, dict):
team_val = LiteLLM_TeamTable(**team_val)
if isinstance(team_val, LiteLLM_TeamTable):
team_tpm_limit = team_val.tpm_limit
team_rpm_limit = team_val.rpm_limit
if team_tpm_limit is not None or team_rpm_limit is not None:
return True
## user
if user_id is not None:
user_val = user_api_key_cache.get_cache(key=user_id)
if isinstance(user_val, dict):
user_val = LiteLLM_UserTable(**user_val)
if isinstance(user_val, LiteLLM_UserTable):
user_tpm_limit = user_val.tpm_limit
user_rpm_limit = user_val.rpm_limit
if user_tpm_limit is not None or user_rpm_limit is not None:
return True
return False
async def check_key_in_limits(
self,
user_api_key_dict: UserAPIKeyAuth,
current_minute_dict: dict,
tpm_limit: int,
rpm_limit: int,
request_count_api_key: str,
type: Literal["key", "user", "team"],
):
if type == "key" and user_api_key_dict.api_key is not None:
current = current_minute_dict["key"].get(user_api_key_dict.api_key, None)
elif type == "user" and user_api_key_dict.user_id is not None:
current = current_minute_dict["user"].get(user_api_key_dict.user_id, None)
elif type == "team" and user_api_key_dict.team_id is not None:
current = current_minute_dict["team"].get(user_api_key_dict.team_id, None)
else:
return
if current is None:
if tpm_limit == 0 or rpm_limit == 0:
# base case
raise HTTPException(
status_code=429, detail="Max tpm/rpm limit reached."
)
elif current["current_tpm"] < tpm_limit and current["current_rpm"] < rpm_limit:
pass
else:
raise HTTPException(status_code=429, detail="Max tpm/rpm limit reached.")
async def async_pre_call_hook(
self,
user_api_key_dict: UserAPIKeyAuth,
cache: DualCache,
data: dict,
call_type: str,
):
self.print_verbose(
f"Inside Max TPM/RPM Limiter Pre-Call Hook - {user_api_key_dict}"
)
api_key = user_api_key_dict.api_key
# check if REQUEST ALLOWED for user_id
user_id = user_api_key_dict.user_id
## get team tpm/rpm limits
team_id = user_api_key_dict.team_id
self.user_api_key_cache = cache
_set_limits = self._check_limits_set(
user_api_key_cache=cache, key=api_key, user_id=user_id, team_id=team_id
)
self.print_verbose(f"_set_limits: {_set_limits}")
if _set_limits == False:
return
# ------------
# Setup values
# ------------
current_date = datetime.now().strftime("%Y-%m-%d")
current_hour = datetime.now().strftime("%H")
current_minute = datetime.now().strftime("%M")
precise_minute = f"{current_date}-{current_hour}-{current_minute}"
cache_key = "usage:{}".format(precise_minute)
current_minute_dict = await self.internal_cache.async_get_cache(
key=cache_key
) # {"usage:{curr_minute}": {"key": {<api_key>: {"current_requests": 1, "current_tpm": 1, "current_rpm": 10}}}}
if current_minute_dict is None:
current_minute_dict = {"key": {}, "user": {}, "team": {}}
if api_key is not None:
tpm_limit = getattr(user_api_key_dict, "tpm_limit", sys.maxsize)
if tpm_limit is None:
tpm_limit = sys.maxsize
rpm_limit = getattr(user_api_key_dict, "rpm_limit", sys.maxsize)
if rpm_limit is None:
rpm_limit = sys.maxsize
request_count_api_key = f"{api_key}::{precise_minute}::request_count"
await self.check_key_in_limits(
user_api_key_dict=user_api_key_dict,
current_minute_dict=current_minute_dict,
request_count_api_key=request_count_api_key,
tpm_limit=tpm_limit,
rpm_limit=rpm_limit,
type="key",
)
if user_id is not None:
_user_id_rate_limits = user_api_key_dict.user_id_rate_limits
# get user tpm/rpm limits
if _user_id_rate_limits is not None and isinstance(
_user_id_rate_limits, dict
):
user_tpm_limit = _user_id_rate_limits.get("tpm_limit", None)
user_rpm_limit = _user_id_rate_limits.get("rpm_limit", None)
if user_tpm_limit is None:
user_tpm_limit = sys.maxsize
if user_rpm_limit is None:
user_rpm_limit = sys.maxsize
# now do the same tpm/rpm checks
request_count_api_key = f"{user_id}::{precise_minute}::request_count"
# print(f"Checking if {request_count_api_key} is allowed to make request for minute {precise_minute}")
await self.check_key_in_limits(
user_api_key_dict=user_api_key_dict,
current_minute_dict=current_minute_dict,
request_count_api_key=request_count_api_key,
tpm_limit=user_tpm_limit,
rpm_limit=user_rpm_limit,
type="user",
)
# TEAM RATE LIMITS
if team_id is not None:
team_tpm_limit = getattr(user_api_key_dict, "team_tpm_limit", sys.maxsize)
if team_tpm_limit is None:
team_tpm_limit = sys.maxsize
team_rpm_limit = getattr(user_api_key_dict, "team_rpm_limit", sys.maxsize)
if team_rpm_limit is None:
team_rpm_limit = sys.maxsize
if team_tpm_limit is None:
team_tpm_limit = sys.maxsize
if team_rpm_limit is None:
team_rpm_limit = sys.maxsize
# now do the same tpm/rpm checks
request_count_api_key = f"{team_id}::{precise_minute}::request_count"
# print(f"Checking if {request_count_api_key} is allowed to make request for minute {precise_minute}")
await self.check_key_in_limits(
user_api_key_dict=user_api_key_dict,
current_minute_dict=current_minute_dict,
request_count_api_key=request_count_api_key,
tpm_limit=team_tpm_limit,
rpm_limit=team_rpm_limit,
type="team",
)
return
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
try:
self.print_verbose(f"INSIDE TPM RPM Limiter ASYNC SUCCESS LOGGING")
user_api_key = kwargs["litellm_params"]["metadata"]["user_api_key"]
user_api_key_user_id = kwargs["litellm_params"]["metadata"].get(
"user_api_key_user_id", None
)
user_api_key_team_id = kwargs["litellm_params"]["metadata"].get(
"user_api_key_team_id", None
)
_limits_set = self._check_limits_set(
user_api_key_cache=self.user_api_key_cache,
key=user_api_key,
user_id=user_api_key_user_id,
team_id=user_api_key_team_id,
)
if _limits_set == False: # don't waste cache calls if no tpm/rpm limits set
return
# ------------
# Setup values
# ------------
current_date = datetime.now().strftime("%Y-%m-%d")
current_hour = datetime.now().strftime("%H")
current_minute = datetime.now().strftime("%M")
precise_minute = f"{current_date}-{current_hour}-{current_minute}"
total_tokens = 0
if isinstance(response_obj, ModelResponse):
total_tokens = response_obj.usage.total_tokens
"""
- get value from redis
- increment requests + 1
- increment tpm + 1
- increment rpm + 1
- update value in-memory + redis
"""
cache_key = "usage:{}".format(precise_minute)
if (
self.internal_cache.redis_cache is not None
): # get straight from redis if possible
current_minute_dict = (
await self.internal_cache.redis_cache.async_get_cache(
key=cache_key,
)
) # {"usage:{current_minute}": {"key": {}, "team": {}, "user": {}}}
else:
current_minute_dict = await self.internal_cache.async_get_cache(
key=cache_key,
)
if current_minute_dict is None:
current_minute_dict = {"key": {}, "user": {}, "team": {}}
_cache_updated = False # check if a cache update is required. prevent unnecessary rewrites.
# ------------
# Update usage - API Key
# ------------
if user_api_key is not None:
_cache_updated = True
## API KEY ##
if user_api_key in current_minute_dict["key"]:
current_key_usage = current_minute_dict["key"][user_api_key]
new_val = {
"current_tpm": current_key_usage["current_tpm"] + total_tokens,
"current_rpm": current_key_usage["current_rpm"] + 1,
}
else:
new_val = {
"current_tpm": total_tokens,
"current_rpm": 1,
}
current_minute_dict["key"][user_api_key] = new_val
self.print_verbose(
f"updated_value in success call: {new_val}, precise_minute: {precise_minute}"
)
# ------------
# Update usage - User
# ------------
if user_api_key_user_id is not None:
_cache_updated = True
total_tokens = 0
if isinstance(response_obj, ModelResponse):
total_tokens = response_obj.usage.total_tokens
if user_api_key_user_id in current_minute_dict["key"]:
current_key_usage = current_minute_dict["key"][user_api_key_user_id]
new_val = {
"current_tpm": current_key_usage["current_tpm"] + total_tokens,
"current_rpm": current_key_usage["current_rpm"] + 1,
}
else:
new_val = {
"current_tpm": total_tokens,
"current_rpm": 1,
}
current_minute_dict["user"][user_api_key_user_id] = new_val
# ------------
# Update usage - Team
# ------------
if user_api_key_team_id is not None:
_cache_updated = True
total_tokens = 0
if isinstance(response_obj, ModelResponse):
total_tokens = response_obj.usage.total_tokens
if user_api_key_team_id in current_minute_dict["key"]:
current_key_usage = current_minute_dict["key"][user_api_key_team_id]
new_val = {
"current_tpm": current_key_usage["current_tpm"] + total_tokens,
"current_rpm": current_key_usage["current_rpm"] + 1,
}
else:
new_val = {
"current_tpm": total_tokens,
"current_rpm": 1,
}
current_minute_dict["team"][user_api_key_team_id] = new_val
if _cache_updated == True:
await self.internal_cache.async_set_cache(
key=cache_key, value=current_minute_dict
)
except Exception as e:
self.print_verbose("{}\n{}".format(e, traceback.format_exc())) # noqa

View file

@ -397,6 +397,7 @@ def _get_pydantic_json_dict(pydantic_obj: BaseModel) -> dict:
def get_custom_headers(
*,
user_api_key_dict: UserAPIKeyAuth,
model_id: Optional[str] = None,
cache_key: Optional[str] = None,
api_base: Optional[str] = None,
@ -410,6 +411,8 @@ def get_custom_headers(
"x-litellm-model-api-base": api_base,
"x-litellm-version": version,
"x-litellm-model-region": model_region,
"x-litellm-key-tpm-limit": str(user_api_key_dict.tpm_limit),
"x-litellm-key-rpm-limit": str(user_api_key_dict.rpm_limit),
}
try:
return {
@ -2787,6 +2790,13 @@ class ProxyConfig:
model.model_info["id"] = _id
model.model_info["db_model"] = True
if premium_user is True:
# seeing "created_at", "updated_at", "created_by", "updated_by" is a LiteLLM Enterprise Feature
model.model_info["created_at"] = getattr(model, "created_at", None)
model.model_info["updated_at"] = getattr(model, "updated_at", None)
model.model_info["created_by"] = getattr(model, "created_by", None)
model.model_info["updated_by"] = getattr(model, "updated_by", None)
if model.model_info is not None and isinstance(model.model_info, dict):
if "id" not in model.model_info:
model.model_info["id"] = model.model_id
@ -3072,10 +3082,9 @@ class ProxyConfig:
try:
if master_key is None or not isinstance(master_key, str):
raise Exception(
raise ValueError(
f"Master key is not initialized or formatted. master_key={master_key}"
)
verbose_proxy_logger.debug(f"llm_router: {llm_router}")
new_models = await prisma_client.db.litellm_proxymodeltable.find_many()
# update llm router
await self._update_llm_router(
@ -4059,6 +4068,7 @@ async def chat_completion(
"stream" in data and data["stream"] == True
): # use generate_responses to stream responses
custom_headers = get_custom_headers(
user_api_key_dict=user_api_key_dict,
model_id=model_id,
cache_key=cache_key,
api_base=api_base,
@ -4078,6 +4088,7 @@ async def chat_completion(
fastapi_response.headers.update(
get_custom_headers(
user_api_key_dict=user_api_key_dict,
model_id=model_id,
cache_key=cache_key,
api_base=api_base,
@ -4298,6 +4309,7 @@ async def completion(
"stream" in data and data["stream"] == True
): # use generate_responses to stream responses
custom_headers = get_custom_headers(
user_api_key_dict=user_api_key_dict,
model_id=model_id,
cache_key=cache_key,
api_base=api_base,
@ -4316,6 +4328,7 @@ async def completion(
)
fastapi_response.headers.update(
get_custom_headers(
user_api_key_dict=user_api_key_dict,
model_id=model_id,
cache_key=cache_key,
api_base=api_base,
@ -4565,6 +4578,7 @@ async def embeddings(
fastapi_response.headers.update(
get_custom_headers(
user_api_key_dict=user_api_key_dict,
model_id=model_id,
cache_key=cache_key,
api_base=api_base,
@ -4748,6 +4762,7 @@ async def image_generation(
fastapi_response.headers.update(
get_custom_headers(
user_api_key_dict=user_api_key_dict,
model_id=model_id,
cache_key=cache_key,
api_base=api_base,
@ -4949,6 +4964,7 @@ async def audio_transcriptions(
fastapi_response.headers.update(
get_custom_headers(
user_api_key_dict=user_api_key_dict,
model_id=model_id,
cache_key=cache_key,
api_base=api_base,
@ -5132,6 +5148,7 @@ async def moderations(
fastapi_response.headers.update(
get_custom_headers(
user_api_key_dict=user_api_key_dict,
model_id=model_id,
cache_key=cache_key,
api_base=api_base,
@ -6083,7 +6100,7 @@ async def get_global_activity_model(
sql_query = """
SELECT
model,
model_group AS model,
date_trunc('day', "startTime") AS date,
COUNT(*) AS api_requests,
SUM(total_tokens) AS total_tokens

View file

@ -35,7 +35,6 @@ from litellm import (
)
from litellm.utils import ModelResponseIterator
from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
from litellm.proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter
from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy.db.base_client import CustomDB
@ -81,9 +80,6 @@ class ProxyLogging:
self.call_details["user_api_key_cache"] = user_api_key_cache
self.internal_usage_cache = DualCache()
self.max_parallel_request_limiter = _PROXY_MaxParallelRequestsHandler()
self.max_tpm_rpm_limiter = _PROXY_MaxTPMRPMLimiter(
internal_cache=self.internal_usage_cache
)
self.max_budget_limiter = _PROXY_MaxBudgetLimiter()
self.cache_control_check = _PROXY_CacheControlCheck()
self.alerting: Optional[List] = None
@ -144,7 +140,6 @@ class ProxyLogging:
print_verbose(f"INITIALIZING LITELLM CALLBACKS!")
self.service_logging_obj = ServiceLogging()
litellm.callbacks.append(self.max_parallel_request_limiter)
litellm.callbacks.append(self.max_tpm_rpm_limiter)
litellm.callbacks.append(self.max_budget_limiter)
litellm.callbacks.append(self.cache_control_check)
litellm.callbacks.append(self.service_logging_obj)

View file

@ -38,6 +38,7 @@ from litellm.utils import (
import copy
from litellm._logging import verbose_router_logger
import logging
from litellm.types.utils import ModelInfo as ModelMapInfo
from litellm.types.router import (
Deployment,
ModelInfo,
@ -349,17 +350,13 @@ class Router:
def validate_fallbacks(self, fallback_param: Optional[List]):
if fallback_param is None:
return
if len(fallback_param) > 0: # if set
## for dictionary in list, check if only 1 key in dict
for _dict in fallback_param:
assert isinstance(_dict, dict), "Item={}, not a dictionary".format(
_dict
)
assert (
len(_dict.keys()) == 1
), "Only 1 key allows in dictionary. You set={} for dict={}".format(
len(_dict.keys()), _dict
)
for fallback_dict in fallback_param:
if not isinstance(fallback_dict, dict):
raise ValueError(f"Item '{fallback_dict}' is not a dictionary.")
if len(fallback_dict) != 1:
raise ValueError(
f"Dictionary '{fallback_dict}' must have exactly one key, but has {len(fallback_dict)} keys.")
def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict):
if routing_strategy == "least-busy":
@ -3065,16 +3062,31 @@ class Router:
try:
model_info = litellm.get_model_info(model=litellm_params.model)
except Exception as e:
continue
model_info = None
# get llm provider
try:
model, llm_provider, _, _ = litellm.get_llm_provider(
model=litellm_params.model,
custom_llm_provider=litellm_params.custom_llm_provider,
)
except Exception as e:
except litellm.exceptions.BadRequestError as e:
continue
if model_info is None:
supported_openai_params = litellm.get_supported_openai_params(
model=model, custom_llm_provider=llm_provider
)
model_info = ModelMapInfo(
max_tokens=None,
max_input_tokens=None,
max_output_tokens=None,
input_cost_per_token=0,
output_cost_per_token=0,
litellm_provider=llm_provider,
mode="chat",
supported_openai_params=supported_openai_params,
)
if model_group_info is None:
model_group_info = ModelGroupInfo(
model_group=model_group, providers=[llm_provider], **model_info # type: ignore
@ -3089,18 +3101,26 @@ class Router:
# supports_function_calling == True
if llm_provider not in model_group_info.providers:
model_group_info.providers.append(llm_provider)
if model_info.get("max_input_tokens", None) is not None and (
if (
model_info.get("max_input_tokens", None) is not None
and model_info["max_input_tokens"] is not None
and (
model_group_info.max_input_tokens is None
or model_info["max_input_tokens"]
> model_group_info.max_input_tokens
)
):
model_group_info.max_input_tokens = model_info[
"max_input_tokens"
]
if model_info.get("max_output_tokens", None) is not None and (
if (
model_info.get("max_output_tokens", None) is not None
and model_info["max_output_tokens"] is not None
and (
model_group_info.max_output_tokens is None
or model_info["max_output_tokens"]
> model_group_info.max_output_tokens
)
):
model_group_info.max_output_tokens = model_info[
"max_output_tokens"
@ -3124,19 +3144,26 @@ class Router:
if (
model_info.get("supports_parallel_function_calling", None)
is not None
and model_info["supports_parallel_function_calling"] == True # type: ignore
and model_info["supports_parallel_function_calling"] is True # type: ignore
):
model_group_info.supports_parallel_function_calling = True
if (
model_info.get("supports_vision", None) is not None
and model_info["supports_vision"] == True # type: ignore
and model_info["supports_vision"] is True # type: ignore
):
model_group_info.supports_vision = True
if (
model_info.get("supports_function_calling", None) is not None
and model_info["supports_function_calling"] == True # type: ignore
and model_info["supports_function_calling"] is True # type: ignore
):
model_group_info.supports_function_calling = True
if (
model_info.get("supported_openai_params", None) is not None
and model_info["supported_openai_params"] is not None
):
model_group_info.supported_openai_params = model_info[
"supported_openai_params"
]
return model_group_info

View file

@ -1,162 +1,163 @@
### REPLACED BY 'test_parallel_request_limiter.py' ###
# What is this?
## Unit tests for the max tpm / rpm limiter hook for proxy
import sys, os, asyncio, time, random
from datetime import datetime
import traceback
from dotenv import load_dotenv
from typing import Optional
# import sys, os, asyncio, time, random
# from datetime import datetime
# import traceback
# from dotenv import load_dotenv
# from typing import Optional
load_dotenv()
import os
# load_dotenv()
# import os
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
import litellm
from litellm import Router
from litellm.proxy.utils import ProxyLogging, hash_token
from litellm.proxy._types import UserAPIKeyAuth
from litellm.caching import DualCache, RedisCache
from litellm.proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter
from datetime import datetime
# sys.path.insert(
# 0, os.path.abspath("../..")
# ) # Adds the parent directory to the system path
# import pytest
# import litellm
# from litellm import Router
# from litellm.proxy.utils import ProxyLogging, hash_token
# from litellm.proxy._types import UserAPIKeyAuth
# from litellm.caching import DualCache, RedisCache
# from litellm.proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter
# from datetime import datetime
@pytest.mark.asyncio
async def test_pre_call_hook_rpm_limits():
"""
Test if error raised on hitting rpm limits
"""
litellm.set_verbose = True
_api_key = hash_token("sk-12345")
user_api_key_dict = UserAPIKeyAuth(api_key=_api_key, tpm_limit=9, rpm_limit=1)
local_cache = DualCache()
# redis_usage_cache = RedisCache()
# @pytest.mark.asyncio
# async def test_pre_call_hook_rpm_limits():
# """
# Test if error raised on hitting rpm limits
# """
# litellm.set_verbose = True
# _api_key = hash_token("sk-12345")
# user_api_key_dict = UserAPIKeyAuth(api_key=_api_key, tpm_limit=9, rpm_limit=1)
# local_cache = DualCache()
# # redis_usage_cache = RedisCache()
local_cache.set_cache(
key=_api_key, value={"api_key": _api_key, "tpm_limit": 9, "rpm_limit": 1}
)
# local_cache.set_cache(
# key=_api_key, value={"api_key": _api_key, "tpm_limit": 9, "rpm_limit": 1}
# )
tpm_rpm_limiter = _PROXY_MaxTPMRPMLimiter(internal_cache=DualCache())
# tpm_rpm_limiter = _PROXY_MaxTPMRPMLimiter(internal_cache=DualCache())
await tpm_rpm_limiter.async_pre_call_hook(
user_api_key_dict=user_api_key_dict, cache=local_cache, data={}, call_type=""
)
# await tpm_rpm_limiter.async_pre_call_hook(
# user_api_key_dict=user_api_key_dict, cache=local_cache, data={}, call_type=""
# )
kwargs = {"litellm_params": {"metadata": {"user_api_key": _api_key}}}
# kwargs = {"litellm_params": {"metadata": {"user_api_key": _api_key}}}
await tpm_rpm_limiter.async_log_success_event(
kwargs=kwargs,
response_obj="",
start_time="",
end_time="",
)
# await tpm_rpm_limiter.async_log_success_event(
# kwargs=kwargs,
# response_obj="",
# start_time="",
# end_time="",
# )
## Expected cache val: {"current_requests": 0, "current_tpm": 0, "current_rpm": 1}
# ## Expected cache val: {"current_requests": 0, "current_tpm": 0, "current_rpm": 1}
try:
await tpm_rpm_limiter.async_pre_call_hook(
user_api_key_dict=user_api_key_dict,
cache=local_cache,
data={},
call_type="",
)
# try:
# await tpm_rpm_limiter.async_pre_call_hook(
# user_api_key_dict=user_api_key_dict,
# cache=local_cache,
# data={},
# call_type="",
# )
pytest.fail(f"Expected call to fail")
except Exception as e:
assert e.status_code == 429
# pytest.fail(f"Expected call to fail")
# except Exception as e:
# assert e.status_code == 429
@pytest.mark.asyncio
async def test_pre_call_hook_team_rpm_limits(
_redis_usage_cache: Optional[RedisCache] = None,
):
"""
Test if error raised on hitting team rpm limits
"""
litellm.set_verbose = True
_api_key = "sk-12345"
_team_id = "unique-team-id"
_user_api_key_dict = {
"api_key": _api_key,
"max_parallel_requests": 1,
"tpm_limit": 9,
"rpm_limit": 10,
"team_rpm_limit": 1,
"team_id": _team_id,
}
user_api_key_dict = UserAPIKeyAuth(**_user_api_key_dict) # type: ignore
_api_key = hash_token(_api_key)
local_cache = DualCache()
local_cache.set_cache(key=_api_key, value=_user_api_key_dict)
internal_cache = DualCache(redis_cache=_redis_usage_cache)
tpm_rpm_limiter = _PROXY_MaxTPMRPMLimiter(internal_cache=internal_cache)
await tpm_rpm_limiter.async_pre_call_hook(
user_api_key_dict=user_api_key_dict, cache=local_cache, data={}, call_type=""
)
# @pytest.mark.asyncio
# async def test_pre_call_hook_team_rpm_limits(
# _redis_usage_cache: Optional[RedisCache] = None,
# ):
# """
# Test if error raised on hitting team rpm limits
# """
# litellm.set_verbose = True
# _api_key = "sk-12345"
# _team_id = "unique-team-id"
# _user_api_key_dict = {
# "api_key": _api_key,
# "max_parallel_requests": 1,
# "tpm_limit": 9,
# "rpm_limit": 10,
# "team_rpm_limit": 1,
# "team_id": _team_id,
# }
# user_api_key_dict = UserAPIKeyAuth(**_user_api_key_dict) # type: ignore
# _api_key = hash_token(_api_key)
# local_cache = DualCache()
# local_cache.set_cache(key=_api_key, value=_user_api_key_dict)
# internal_cache = DualCache(redis_cache=_redis_usage_cache)
# tpm_rpm_limiter = _PROXY_MaxTPMRPMLimiter(internal_cache=internal_cache)
# await tpm_rpm_limiter.async_pre_call_hook(
# user_api_key_dict=user_api_key_dict, cache=local_cache, data={}, call_type=""
# )
kwargs = {
"litellm_params": {
"metadata": {"user_api_key": _api_key, "user_api_key_team_id": _team_id}
}
}
# kwargs = {
# "litellm_params": {
# "metadata": {"user_api_key": _api_key, "user_api_key_team_id": _team_id}
# }
# }
await tpm_rpm_limiter.async_log_success_event(
kwargs=kwargs,
response_obj="",
start_time="",
end_time="",
)
# await tpm_rpm_limiter.async_log_success_event(
# kwargs=kwargs,
# response_obj="",
# start_time="",
# end_time="",
# )
print(f"local_cache: {local_cache}")
# print(f"local_cache: {local_cache}")
## Expected cache val: {"current_requests": 0, "current_tpm": 0, "current_rpm": 1}
# ## Expected cache val: {"current_requests": 0, "current_tpm": 0, "current_rpm": 1}
try:
await tpm_rpm_limiter.async_pre_call_hook(
user_api_key_dict=user_api_key_dict,
cache=local_cache,
data={},
call_type="",
)
# try:
# await tpm_rpm_limiter.async_pre_call_hook(
# user_api_key_dict=user_api_key_dict,
# cache=local_cache,
# data={},
# call_type="",
# )
pytest.fail(f"Expected call to fail")
except Exception as e:
assert e.status_code == 429 # type: ignore
# pytest.fail(f"Expected call to fail")
# except Exception as e:
# assert e.status_code == 429 # type: ignore
@pytest.mark.asyncio
async def test_namespace():
"""
- test if default namespace set via `proxyconfig._init_cache`
- respected for tpm/rpm caching
"""
from litellm.proxy.proxy_server import ProxyConfig
# @pytest.mark.asyncio
# async def test_namespace():
# """
# - test if default namespace set via `proxyconfig._init_cache`
# - respected for tpm/rpm caching
# """
# from litellm.proxy.proxy_server import ProxyConfig
redis_usage_cache: Optional[RedisCache] = None
cache_params = {"type": "redis", "namespace": "litellm_default"}
# redis_usage_cache: Optional[RedisCache] = None
# cache_params = {"type": "redis", "namespace": "litellm_default"}
## INIT CACHE ##
proxy_config = ProxyConfig()
setattr(litellm.proxy.proxy_server, "proxy_config", proxy_config)
# ## INIT CACHE ##
# proxy_config = ProxyConfig()
# setattr(litellm.proxy.proxy_server, "proxy_config", proxy_config)
proxy_config._init_cache(cache_params=cache_params)
# proxy_config._init_cache(cache_params=cache_params)
redis_cache: Optional[RedisCache] = getattr(
litellm.proxy.proxy_server, "redis_usage_cache"
)
# redis_cache: Optional[RedisCache] = getattr(
# litellm.proxy.proxy_server, "redis_usage_cache"
# )
## CHECK IF NAMESPACE SET ##
assert redis_cache.namespace == "litellm_default"
# ## CHECK IF NAMESPACE SET ##
# assert redis_cache.namespace == "litellm_default"
## CHECK IF TPM/RPM RATE LIMITING WORKS ##
await test_pre_call_hook_team_rpm_limits(_redis_usage_cache=redis_cache)
current_date = datetime.now().strftime("%Y-%m-%d")
current_hour = datetime.now().strftime("%H")
current_minute = datetime.now().strftime("%M")
precise_minute = f"{current_date}-{current_hour}-{current_minute}"
# ## CHECK IF TPM/RPM RATE LIMITING WORKS ##
# await test_pre_call_hook_team_rpm_limits(_redis_usage_cache=redis_cache)
# current_date = datetime.now().strftime("%Y-%m-%d")
# current_hour = datetime.now().strftime("%H")
# current_minute = datetime.now().strftime("%M")
# precise_minute = f"{current_date}-{current_hour}-{current_minute}"
cache_key = "litellm_default:usage:{}".format(precise_minute)
value = await redis_cache.async_get_cache(key=cache_key)
assert value is not None
# cache_key = "litellm_default:usage:{}".format(precise_minute)
# value = await redis_cache.async_get_cache(key=cache_key)
# assert value is not None

View file

@ -229,17 +229,21 @@ async def test_pre_call_hook_user_tpm_limits():
"""
Test if error raised on hitting tpm limits
"""
local_cache = DualCache()
# create user with tpm/rpm limits
user_id = "test-user"
user_obj = {"tpm_limit": 9, "rpm_limit": 10}
local_cache.set_cache(key=user_id, value=user_obj)
_api_key = "sk-12345"
user_api_key_dict = UserAPIKeyAuth(
api_key=_api_key,
user_id="ishaan",
user_id_rate_limits={"tpm_limit": 9, "rpm_limit": 10},
user_id=user_id,
)
res = dict(user_api_key_dict)
print("dict user", res)
local_cache = DualCache()
parallel_request_handler = MaxParallelRequestsHandler()
await parallel_request_handler.async_pre_call_hook(
@ -248,7 +252,7 @@ async def test_pre_call_hook_user_tpm_limits():
kwargs = {
"litellm_params": {
"metadata": {"user_api_key_user_id": "ishaan", "user_api_key": "gm"}
"metadata": {"user_api_key_user_id": user_id, "user_api_key": "gm"}
}
}
@ -734,7 +738,7 @@ async def test_bad_router_call():
request_count_api_key = f"{_api_key}::{precise_minute}::request_count"
assert (
parallel_request_handler.user_api_key_cache.get_cache(
parallel_request_handler.user_api_key_cache.get_cache( # type: ignore
key=request_count_api_key
)["current_requests"]
== 1
@ -751,7 +755,7 @@ async def test_bad_router_call():
except:
pass
assert (
parallel_request_handler.user_api_key_cache.get_cache(
parallel_request_handler.user_api_key_cache.get_cache( # type: ignore
key=request_count_api_key
)["current_requests"]
== 0

View file

@ -1,9 +1,15 @@
"""
litellm.Router Types - includes RouterConfig, UpdateRouterConfig, ModelInfo etc
"""
from typing import List, Optional, Union, Dict, Tuple, Literal, TypedDict
import uuid
import enum
import httpx
from pydantic import BaseModel, validator, Field
from pydantic import BaseModel, Field
import datetime
from .completion import CompletionRequest
from .embedding import EmbeddingRequest
import uuid, enum
class ModelConfig(BaseModel):
@ -76,6 +82,12 @@ class ModelInfo(BaseModel):
db_model: bool = (
False # used for proxy - to separate models which are stored in the db vs. config.
)
updated_at: Optional[datetime.datetime] = None
updated_by: Optional[str] = None
created_at: Optional[datetime.datetime] = None
created_by: Optional[str] = None
base_model: Optional[str] = (
None # specify if the base model is azure/gpt-3.5-turbo etc for accurate cost tracking
)
@ -426,3 +438,4 @@ class ModelGroupInfo(BaseModel):
supports_parallel_function_calling: bool = Field(default=False)
supports_vision: bool = Field(default=False)
supports_function_calling: bool = Field(default=False)
supported_openai_params: List[str] = Field(default=[])

View file

@ -15,10 +15,17 @@ class ProviderField(TypedDict):
class ModelInfo(TypedDict):
max_tokens: int
max_input_tokens: int
max_output_tokens: int
"""
Model info for a given model, this is information found in litellm.model_prices_and_context_window.json
"""
max_tokens: Optional[int]
max_input_tokens: Optional[int]
max_output_tokens: Optional[int]
input_cost_per_token: float
output_cost_per_token: float
litellm_provider: str
mode: str
mode: Literal[
"completion", "embedding", "image_generation", "chat", "audio_transcription"
]
supported_openai_params: Optional[List[str]]

View file

@ -5797,30 +5797,11 @@ def get_optional_params(
model=model, custom_llm_provider=custom_llm_provider
)
_check_valid_arg(supported_params=supported_params)
if temperature is not None:
if (
temperature == 0 and model == "mistralai/Mistral-7B-Instruct-v0.1"
): # this model does no support temperature == 0
temperature = 0.0001 # close to 0
optional_params["temperature"] = temperature
if top_p:
optional_params["top_p"] = top_p
if n:
optional_params["n"] = n
if stream:
optional_params["stream"] = stream
if stop:
optional_params["stop"] = stop
if max_tokens:
optional_params["max_tokens"] = max_tokens
if presence_penalty:
optional_params["presence_penalty"] = presence_penalty
if frequency_penalty:
optional_params["frequency_penalty"] = frequency_penalty
if logit_bias:
optional_params["logit_bias"] = logit_bias
if user:
optional_params["user"] = user
optional_params = litellm.DeepInfraConfig().map_openai_params(
non_default_params=non_default_params,
optional_params=optional_params,
model=model,
)
elif custom_llm_provider == "perplexity":
supported_params = get_supported_openai_params(
model=model, custom_llm_provider=custom_llm_provider
@ -6604,19 +6585,7 @@ def get_supported_openai_params(
elif custom_llm_provider == "petals":
return ["max_tokens", "temperature", "top_p", "stream"]
elif custom_llm_provider == "deepinfra":
return [
"temperature",
"top_p",
"n",
"stream",
"stop",
"max_tokens",
"presence_penalty",
"frequency_penalty",
"logit_bias",
"user",
"response_format",
]
return litellm.DeepInfraConfig().get_supported_openai_params()
elif custom_llm_provider == "perplexity":
return [
"temperature",
@ -7107,6 +7076,7 @@ def get_model_info(model: str) -> ModelInfo:
- output_cost_per_token (float): The cost per token for output.
- litellm_provider (str): The provider of the model (e.g., "openai").
- mode (str): The mode of the model (e.g., "chat" or "completion").
- supported_openai_params (List[str]): A list of supported OpenAI parameters for the model.
Raises:
Exception: If the model is not mapped yet.
@ -7118,9 +7088,11 @@ def get_model_info(model: str) -> ModelInfo:
"input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006,
"litellm_provider": "openai",
"mode": "chat"
"mode": "chat",
"supported_openai_params": ["temperature", "max_tokens", "top_p", "frequency_penalty", "presence_penalty"]
}
"""
supported_openai_params: Union[List[str], None] = []
def _get_max_position_embeddings(model_name):
# Construct the URL for the config.json file
@ -7148,9 +7120,18 @@ def get_model_info(model: str) -> ModelInfo:
azure_llms = litellm.azure_llms
if model in azure_llms:
model = azure_llms[model]
if model in litellm.model_cost:
return litellm.model_cost[model]
model, custom_llm_provider, _, _ = get_llm_provider(model=model)
##########################
# Get custom_llm_provider
split_model, custom_llm_provider = model, ""
try:
split_model, custom_llm_provider, _, _ = get_llm_provider(model=model)
except:
pass
#########################
supported_openai_params = litellm.get_supported_openai_params(
model=model, custom_llm_provider=custom_llm_provider
)
if custom_llm_provider == "huggingface":
max_tokens = _get_max_position_embeddings(model_name=model)
return {
@ -7159,15 +7140,26 @@ def get_model_info(model: str) -> ModelInfo:
"output_cost_per_token": 0,
"litellm_provider": "huggingface",
"mode": "chat",
"supported_openai_params": supported_openai_params,
}
else:
"""
Check if model in model cost map
Check if:
1. 'model' in litellm.model_cost. Checks "groq/llama3-8b-8192" in litellm.model_cost
2. 'split_model' in litellm.model_cost. Checks "llama3-8b-8192" in litellm.model_cost
"""
if model in litellm.model_cost:
return litellm.model_cost[model]
_model_info = litellm.model_cost[model]
_model_info["supported_openai_params"] = supported_openai_params
return _model_info
if split_model in litellm.model_cost:
_model_info = litellm.model_cost[split_model]
_model_info["supported_openai_params"] = supported_openai_params
return _model_info
else:
raise Exception()
raise ValueError(
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
)
except:
raise Exception(
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "1.38.10"
version = "1.38.11"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT"
@ -79,7 +79,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
version = "1.38.10"
version = "1.38.11"
version_files = [
"pyproject.toml:^version"
]

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[39712,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"608\",\"static/chunks/608-d128caa3cfe973c1.js\",\"931\",\"static/chunks/app/page-e266cb0126026d40.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"dYIEEO-62OCgyckEhgBd-\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-e85084d25f9ae5e4.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[39712,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"608\",\"static/chunks/608-d128caa3cfe973c1.js\",\"931\",\"static/chunks/app/page-76d278f96a0e9768.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/5d93d4a9fa59d72f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"D_ZUmMtLMPSa4aQQUJtKt\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[39712,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","608","static/chunks/608-d128caa3cfe973c1.js","931","static/chunks/app/page-e266cb0126026d40.js"],""]
3:I[39712,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","608","static/chunks/608-d128caa3cfe973c1.js","931","static/chunks/app/page-76d278f96a0e9768.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["dYIEEO-62OCgyckEhgBd-",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5d93d4a9fa59d72f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["D_ZUmMtLMPSa4aQQUJtKt",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/5d93d4a9fa59d72f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -86,6 +86,8 @@ import type { UploadProps } from "antd";
import { Upload } from "antd";
import TimeToFirstToken from "./model_metrics/time_to_first_token";
import DynamicFields from "./model_add/dynamic_form";
import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
interface ModelDashboardProps {
accessToken: string | null;
token: string | null;
@ -269,6 +271,8 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
const [selectedProvider, setSelectedProvider] = useState<String>("OpenAI");
const [healthCheckResponse, setHealthCheckResponse] = useState<string>("");
const [editModalVisible, setEditModalVisible] = useState<boolean>(false);
const [infoModalVisible, setInfoModalVisible] = useState<boolean>(false);
const [selectedModel, setSelectedModel] = useState<any>(null);
const [availableModelGroups, setAvailableModelGroups] = useState<
Array<string>
@ -297,6 +301,15 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
useState<RetryPolicyObject | null>(null);
const [defaultRetry, setDefaultRetry] = useState<number>(0);
function formatCreatedAt(createdAt: string | null) {
if (createdAt) {
const date = new Date(createdAt);
const options = { month: 'long', day: 'numeric', year: 'numeric' };
return date.toLocaleDateString('en-US');
}
return null;
}
const EditModelModal: React.FC<EditModelModalProps> = ({
visible,
onCancel,
@ -423,11 +436,21 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
setEditModalVisible(true);
};
const handleInfoClick = (model: any) => {
setSelectedModel(model);
setInfoModalVisible(true);
};
const handleEditCancel = () => {
setEditModalVisible(false);
setSelectedModel(null);
};
const handleInfoCancel = () => {
setInfoModalVisible(false);
setSelectedModel(null);
};
const handleEditSubmit = async (formValues: Record<string, any>) => {
// Call API to update team with teamId and values
@ -1039,7 +1062,6 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
</div>
<Card>
<Table
className="mt-5"
style={{ maxWidth: "1500px", width: "100%" }}
>
<TableHead>
@ -1049,6 +1071,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
maxWidth: "150px",
whiteSpace: "normal",
wordBreak: "break-word",
fontSize: "11px"
}}
>
Public Model Name
@ -1058,6 +1081,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
maxWidth: "100px",
whiteSpace: "normal",
wordBreak: "break-word",
fontSize: "11px"
}}
>
Provider
@ -1068,25 +1092,18 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
maxWidth: "150px",
whiteSpace: "normal",
wordBreak: "break-word",
fontSize: "11px"
}}
>
API Base
</TableHeaderCell>
)}
<TableHeaderCell
style={{
maxWidth: "200px",
whiteSpace: "normal",
wordBreak: "break-word",
}}
>
Extra litellm Params
</TableHeaderCell>
<TableHeaderCell
style={{
maxWidth: "85px",
whiteSpace: "normal",
wordBreak: "break-word",
fontSize: "11px"
}}
>
Input Price{" "}
@ -1099,6 +1116,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
maxWidth: "85px",
whiteSpace: "normal",
wordBreak: "break-word",
fontSize: "11px"
}}
>
Output Price{" "}
@ -1106,24 +1124,45 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
/1M Tokens ($)
</p>
</TableHeaderCell>
<TableHeaderCell
style={{
maxWidth: "120px",
maxWidth: "100px",
whiteSpace: "normal",
wordBreak: "break-word",
fontSize: "11px"
}}
>
Max Tokens
{
premiumUser ? "Created At" : <a href="https://forms.gle/W3U4PZpJGFHWtHyA9" target="_blank" style={{color: "#72bcd4" }}> Created At</a>
}
</TableHeaderCell>
<TableHeaderCell
style={{
maxWidth: "100px",
whiteSpace: "normal",
wordBreak: "break-word",
fontSize: "11px"
}}
>
{
premiumUser ? "Created By" : <a href="https://forms.gle/W3U4PZpJGFHWtHyA9" target="_blank" style={{color: "#72bcd4" }}> Created By</a>
}
</TableHeaderCell>
<TableHeaderCell
style={{
maxWidth: "50px",
whiteSpace: "normal",
wordBreak: "break-word",
fontSize: "11px"
}}
>
Status
</TableHeaderCell>
<TableHeaderCell>
</TableHeaderCell>
</TableRow>
</TableHead>
<TableBody>
@ -1137,15 +1176,17 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
selectedModelGroup === ""
)
.map((model: any, index: number) => (
<TableRow key={index}>
<TableRow key={index} style={{ maxHeight: "1px", minHeight: "1px" }}>
<TableCell
style={{
maxWidth: "150px",
maxWidth: "100px",
whiteSpace: "normal",
wordBreak: "break-word",
}}
>
<Text>{model.model_name}</Text>
<p style={{ fontSize: "10px" }}>
{model.model_name || "-"}
</p>
</TableCell>
<TableCell
style={{
@ -1154,7 +1195,9 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
wordBreak: "break-word",
}}
>
{model.provider}
<p style={{ fontSize: "10px" }}>
{model.provider || "-"}
</p>
</TableCell>
{userRole === "Admin" && (
<TableCell
@ -1164,31 +1207,22 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
wordBreak: "break-word",
}}
>
{model.api_base}
</TableCell>
)}
<TableCell
<Tooltip title={model && model.api_base}>
<pre
style={{
maxWidth: "200px",
maxWidth: "150px",
whiteSpace: "normal",
wordBreak: "break-word",
fontSize: "10px",
}}
title={model && model.api_base ? model.api_base : ""}
>
<Accordion>
<AccordionHeader>
<Text>Litellm params</Text>
</AccordionHeader>
<AccordionBody>
<pre>
{JSON.stringify(
model.cleanedLitellmParams,
null,
2
)}
{model && model.api_base ? model.api_base.slice(0, 20) : "-"}
</pre>
</AccordionBody>
</Accordion>
</Tooltip>
</TableCell>
)}
<TableCell
style={{
maxWidth: "80px",
@ -1196,6 +1230,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
wordBreak: "break-word",
}}
>
<pre style={{ fontSize: "10px" }}>
{model.input_cost
? model.input_cost
: model.litellm_params.input_cost_per_token
@ -1205,6 +1240,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
) * 1000000
).toFixed(2)
: null}
</pre>
</TableCell>
<TableCell
style={{
@ -1213,6 +1249,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
wordBreak: "break-word",
}}
>
<pre style={{ fontSize: "10px" }}>
{model.output_cost
? model.output_cost
: model.litellm_params.output_cost_per_token
@ -1222,17 +1259,21 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
) * 1000000
).toFixed(2)
: null}
</pre>
</TableCell>
<TableCell
style={{
maxWidth: "120px",
whiteSpace: "normal",
wordBreak: "break-word",
}}
>
<TableCell>
<p style={{ fontSize: "10px" }}>
Max Tokens: {model.max_tokens} <br></br>
Max Input Tokens: {model.max_input_tokens}
{
premiumUser ? formatCreatedAt(model.model_info.created_at) || "-" : "-"
}
</p>
</TableCell>
<TableCell>
<p style={{ fontSize: "10px" }}>
{
premiumUser ? model.model_info.created_by || "-" : "-"
}
</p>
</TableCell>
<TableCell
@ -1248,7 +1289,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
size="xs"
className="text-white"
>
<p style={{ fontSize: "10px" }}>DB Model</p>
<p style={{ fontSize: "8px" }}>DB Model</p>
</Badge>
) : (
<Badge
@ -1256,26 +1297,42 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
size="xs"
className="text-black"
>
<p style={{ fontSize: "10px" }}>Config Model</p>
<p style={{ fontSize: "8px" }}>Config Model</p>
</Badge>
)}
</TableCell>
<TableCell
style={{
maxWidth: "100px",
maxWidth: "150px",
whiteSpace: "normal",
wordBreak: "break-word",
}}
>
<Grid numItems={3}>
<Col>
<Icon
icon={InformationCircleIcon}
size="sm"
onClick={() => handleInfoClick(model)}
/>
</Col>
<Col>
<Icon
icon={PencilAltIcon}
size="sm"
onClick={() => handleEditClick(model)}
/>
</Col>
<Col>
<DeleteModelButton
modelID={model.model_info.id}
accessToken={accessToken}
/>
</Col>
</Grid>
</TableCell>
</TableRow>
))}
@ -1289,6 +1346,20 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
model={selectedModel}
onSubmit={handleEditSubmit}
/>
<Modal
title={selectedModel && selectedModel.model_name}
visible={infoModalVisible}
width={800}
footer={null}
onCancel={handleInfoCancel}
>
<Title>Model Info</Title>
<SyntaxHighlighter language="json" >
{selectedModel && JSON.stringify(selectedModel, null, 2)}
</SyntaxHighlighter>
</Modal>
</TabPanel>
<TabPanel className="h-full">
<Title2 level={2}>Add new model</Title2>

View file

@ -1,20 +1,26 @@
import React, { useEffect, useState } from 'react';
import React, { useEffect, useState } from "react";
import { modelHubCall } from "./networking";
import { Card, Text, Title, Grid, Button, Badge, Tab,
import {
Card,
Text,
Title,
Grid,
Button,
Badge,
Tab,
TabGroup,
TabList,
TabPanel,
TabPanels, } from "@tremor/react";
TabPanels,
} from "@tremor/react";
import { RightOutlined, CopyOutlined } from '@ant-design/icons';
import { RightOutlined, CopyOutlined } from "@ant-design/icons";
import { Modal, Tooltip } from 'antd';
import { Modal, Tooltip } from "antd";
import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
interface ModelHubProps {
userID: string | null;
userRole: string | null;
@ -22,7 +28,6 @@ interface ModelHubProps {
accessToken: string | null;
keys: any; // Replace with the appropriate type for 'keys' prop
premiumUser: boolean;
}
interface ModelInfo {
@ -32,15 +37,13 @@ interface ModelInfo {
supports_vision: boolean;
max_input_tokens?: number;
max_output_tokens?: number;
supported_openai_params?: string[];
// Add other properties if needed
}
const ModelHub: React.FC<ModelHubProps> = ({
userID,
userRole,
@ -52,140 +55,80 @@ const ModelHub: React.FC<ModelHubProps> = ({
keys,
premiumUser,
}) => {
const [modelHubData, setModelHubData] = useState<ModelInfo[] | null>(null);
const [isModalVisible, setIsModalVisible] = useState(false);
const [selectedModel, setSelectedModel] = useState<null | ModelInfo>(null);
useEffect(() => {
if (!accessToken || !token || !userRole || !userID) {
return;
}
const fetchData = async () => {
try {
const _modelHubData = await modelHubCall(accessToken, userID, userRole);
console.log("ModelHubData:", _modelHubData);
setModelHubData(_modelHubData.data);
} catch (error) {
console.error("There was an error fetching the model data", error);
}
};
fetchData();
}, [accessToken, token, userRole, userID]);
const showModal = (model: ModelInfo) => {
setSelectedModel(model);
setIsModalVisible(true);
};
const handleOk = () => {
setIsModalVisible(false);
setSelectedModel(null);
};
const handleCancel = () => {
setIsModalVisible(false);
setSelectedModel(null);
};
const copyToClipboard = (text: string) => {
navigator.clipboard.writeText(text);
};
return (
<div>
<div className="w-full m-2 mt-2 p-8">
<div className="relative w-full"></div>
<div className="relative w-full">
</div>
<div className='flex items-center'>
<Title className='ml-8 text-center '>Model Hub</Title>
<Button className='ml-4'>
<div className="flex items-center">
<Title className="ml-8 text-center ">Model Hub</Title>
<Button className="ml-4">
<a href="https://forms.gle/W3U4PZpJGFHWtHyA9" target="_blank">
Share
Make Public
</a>
</Button>
</div>
<div className="grid grid-cols-2 gap-6 sm:grid-cols-3 lg:grid-cols-4">
{modelHubData && modelHubData.map((model: ModelInfo) => (
<Card
key={model.model_group}
className="mt-5 mx-8"
>
<pre className='flex justify-between'>
{modelHubData &&
modelHubData.map((model: ModelInfo) => (
<Card key={model.model_group} className="mt-5 mx-8">
<pre className="flex justify-between">
<Title>{model.model_group}</Title>
<Tooltip title={model.model_group}>
<CopyOutlined onClick={() => copyToClipboard(model.model_group)} style={{ cursor: 'pointer', marginRight: '10px' }} />
<CopyOutlined
onClick={() => copyToClipboard(model.model_group)}
style={{ cursor: "pointer", marginRight: "10px" }}
/>
</Tooltip>
</pre>
<div className='my-5'>
<Text>Mode: {model.mode}</Text>
@ -193,52 +136,37 @@ const ModelHub: React.FC<ModelHubProps> = ({
<Text>Supports Vision: {model?.supports_vision == true ? "Yes" : "No"}</Text>
<Text>Max Input Tokens: {model?.max_input_tokens ? model?.max_input_tokens : "N/A"}</Text>
<Text>Max Output Tokens: {model?.max_output_tokens ? model?.max_output_tokens : "N/A"}</Text>
</div>
<div style={{ marginTop: 'auto', textAlign: 'right' }}>
<a href="#" onClick={() => showModal(model)} style={{ color: '#1890ff', fontSize: 'smaller' }}>
<div style={{ marginTop: "auto", textAlign: "right" }}>
<a
href="#"
onClick={() => showModal(model)}
style={{ color: "#1890ff", fontSize: "smaller" }}
>
View more <RightOutlined />
</a>
</div>
</Card>
))}
</div>
</div>
<Modal
title="Model Usage"
title={selectedModel && selectedModel.model_group ? selectedModel.model_group : "Unknown Model"}
width={800}
visible={isModalVisible}
footer={null}
onOk={handleOk}
onCancel={handleCancel}
>
{selectedModel && (
<div>
<p><strong>Model Name:</strong> {selectedModel.model_group}</p>
<p className='mb-4'><strong>Model Information & Usage</strong></p>
<TabGroup>
<TabList>
<Tab>OpenAI Python SDK</Tab>
<Tab>Supported OpenAI Params</Tab>
<Tab>LlamaIndex</Tab>
<Tab>Langchain Py</Tab>
</TabList>
@ -266,6 +194,11 @@ print(response)
`}
</SyntaxHighlighter>
</TabPanel>
<TabPanel>
<SyntaxHighlighter language="python">
{`${selectedModel.supported_openai_params?.map((param) => `${param}\n`).join('')}`}
</SyntaxHighlighter>
</TabPanel>
<TabPanel>
<SyntaxHighlighter language="python">
{`
@ -340,19 +273,11 @@ print(response)
{/* <p><strong>Additional Params:</strong> {JSON.stringify(selectedModel.litellm_params)}</p> */}
{/* Add other model details here */}
</div>
)}
</Modal>
</div>
);
};
export default ModelHub;

View file

@ -1,6 +1,10 @@
"use client";
import React, { useState, useEffect } from "react";
import { userInfoCall, modelAvailableCall, getTotalSpendCall } from "./networking";
import {
userInfoCall,
modelAvailableCall,
getTotalSpendCall,
} from "./networking";
import { Grid, Col, Card, Text, Title } from "@tremor/react";
import CreateKey from "./create_key_button";
import ViewKeyTable from "./view_key_table";
@ -19,7 +23,6 @@ type UserSpendData = {
max_budget?: number | null;
};
interface UserDashboardProps {
userID: string | null;
userRole: string | null;
@ -35,8 +38,8 @@ interface UserDashboardProps {
type TeamInterface = {
models: any[];
team_id: null;
team_alias: String
}
team_alias: String;
};
const UserDashboard: React.FC<UserDashboardProps> = ({
userID,
@ -63,10 +66,10 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
const [teamSpend, setTeamSpend] = useState<number | null>(null);
const [userModels, setUserModels] = useState<string[]>([]);
const defaultTeam: TeamInterface = {
"models": [],
"team_alias": "Default Team",
"team_id": null
}
models: [],
team_alias: "Default Team",
team_id: null,
};
const [selectedTeam, setSelectedTeam] = useState<any | null>(
teams ? teams[0] : defaultTeam
);
@ -137,7 +140,14 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
} else {
const fetchData = async () => {
try {
const response = await userInfoCall(accessToken, userID, userRole, false, null, null);
const response = await userInfoCall(
accessToken,
userID,
userRole,
false,
null,
null
);
console.log(
`received teams in user dashboard: ${Object.keys(
response
@ -152,7 +162,7 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
}
setKeys(response["keys"]); // Assuming this is the correct path to your data
setTeams(response["teams"]);
const teamsArray = [...response['teams']];
const teamsArray = [...response["teams"]];
if (teamsArray.length > 0) {
console.log(`response['teams']: ${teamsArray}`);
setSelectedTeam(teamsArray[0]);
@ -194,22 +204,30 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
fetchData();
}
}
}, [userID, token, accessToken, keys, userRole]);
useEffect(() => {
// This code will run every time selectedTeam changes
if (keys !== null && selectedTeam !== null && selectedTeam !== undefined) {
if (
keys !== null &&
selectedTeam !== null &&
selectedTeam !== undefined &&
selectedTeam.team_id !== null
) {
let sum = 0;
for (const key of keys) {
if (selectedTeam.hasOwnProperty('team_id') && key.team_id !== null && key.team_id === selectedTeam.team_id) {
if (
selectedTeam.hasOwnProperty("team_id") &&
key.team_id !== null &&
key.team_id === selectedTeam.team_id
) {
sum += key.spend;
}
}
setTeamSpend(sum);
} else if (keys !== null) {
// sum the keys which don't have team-id set (default team)
let sum = 0
let sum = 0;
for (const key of keys) {
sum += key.spend;
}
@ -245,7 +263,6 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
}
console.log("inside user dashboard, selected team", selectedTeam);
console.log(`teamSpend: ${teamSpend}`)
return (
<div className="w-full mx-4">
<Grid numItems={1} className="gap-2 p-8 h-[75vh] w-full mt-2">
@ -262,7 +279,6 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
accessToken={accessToken}
userSpend={teamSpend}
selectedTeam={selectedTeam ? selectedTeam : null}
/>
<ViewKeyTable
@ -283,7 +299,11 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
data={keys}
setData={setKeys}
/>
<DashboardTeam teams={teams} setSelectedTeam={setSelectedTeam} userRole={userRole}/>
<DashboardTeam
teams={teams}
setSelectedTeam={setSelectedTeam}
userRole={userRole}
/>
</Col>
</Grid>
</div>