Merge branch 'main' into litellm_end_user_cost_tracking

This commit is contained in:
Krish Dholakia 2024-05-15 22:27:04 -07:00 committed by GitHub
commit 07a1cf39e8
44 changed files with 1983 additions and 681 deletions

View file

@ -102,12 +102,18 @@ Ollama supported models: https://github.com/ollama/ollama
| Model Name | Function Call |
|----------------------|-----------------------------------------------------------------------------------
| Mistral | `completion(model='ollama/mistral', messages, api_base="http://localhost:11434", stream=True)` |
| Mistral-7B-Instruct-v0.1 | `completion(model='ollama/mistral-7B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
| Mistral-7B-Instruct-v0.2 | `completion(model='ollama/mistral-7B-Instruct-v0.2', messages, api_base="http://localhost:11434", stream=False)` |
| Mixtral-8x7B-Instruct-v0.1 | `completion(model='ollama/mistral-8x7B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
| Mixtral-8x22B-Instruct-v0.1 | `completion(model='ollama/mixtral-8x22B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
| Llama2 7B | `completion(model='ollama/llama2', messages, api_base="http://localhost:11434", stream=True)` |
| Llama2 13B | `completion(model='ollama/llama2:13b', messages, api_base="http://localhost:11434", stream=True)` |
| Llama2 70B | `completion(model='ollama/llama2:70b', messages, api_base="http://localhost:11434", stream=True)` |
| Llama2 Uncensored | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` |
| Code Llama | `completion(model='ollama/codellama', messages, api_base="http://localhost:11434", stream=True)` |
| Llama2 Uncensored | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` |
|Meta LLaMa3 8B | `completion(model='ollama/llama3', messages, api_base="http://localhost:11434", stream=False)` |
| Meta LLaMa3 70B | `completion(model='ollama/llama3:70b', messages, api_base="http://localhost:11434", stream=False)` |
| Orca Mini | `completion(model='ollama/orca-mini', messages, api_base="http://localhost:11434", stream=True)` |
| Vicuna | `completion(model='ollama/vicuna', messages, api_base="http://localhost:11434", stream=True)` |
| Nous-Hermes | `completion(model='ollama/nous-hermes', messages, api_base="http://localhost:11434", stream=True)` |

View file

@ -15,11 +15,19 @@ from typing import Optional
class AuthenticationError(openai.AuthenticationError): # type: ignore
def __init__(self, message, llm_provider, model, response: httpx.Response):
def __init__(
self,
message,
llm_provider,
model,
response: httpx.Response,
litellm_debug_info: Optional[str] = None,
):
self.status_code = 401
self.message = message
self.llm_provider = llm_provider
self.model = model
self.litellm_debug_info = litellm_debug_info
super().__init__(
self.message, response=response, body=None
) # Call the base class constructor with the parameters it needs
@ -27,11 +35,19 @@ class AuthenticationError(openai.AuthenticationError): # type: ignore
# raise when invalid models passed, example gpt-8
class NotFoundError(openai.NotFoundError): # type: ignore
def __init__(self, message, model, llm_provider, response: httpx.Response):
def __init__(
self,
message,
model,
llm_provider,
response: httpx.Response,
litellm_debug_info: Optional[str] = None,
):
self.status_code = 404
self.message = message
self.model = model
self.llm_provider = llm_provider
self.litellm_debug_info = litellm_debug_info
super().__init__(
self.message, response=response, body=None
) # Call the base class constructor with the parameters it needs
@ -39,12 +55,18 @@ class NotFoundError(openai.NotFoundError): # type: ignore
class BadRequestError(openai.BadRequestError): # type: ignore
def __init__(
self, message, model, llm_provider, response: Optional[httpx.Response] = None
self,
message,
model,
llm_provider,
response: Optional[httpx.Response] = None,
litellm_debug_info: Optional[str] = None,
):
self.status_code = 400
self.message = message
self.model = model
self.llm_provider = llm_provider
self.litellm_debug_info = litellm_debug_info
response = response or httpx.Response(
status_code=self.status_code,
request=httpx.Request(
@ -57,18 +79,28 @@ class BadRequestError(openai.BadRequestError): # type: ignore
class UnprocessableEntityError(openai.UnprocessableEntityError): # type: ignore
def __init__(self, message, model, llm_provider, response: httpx.Response):
def __init__(
self,
message,
model,
llm_provider,
response: httpx.Response,
litellm_debug_info: Optional[str] = None,
):
self.status_code = 422
self.message = message
self.model = model
self.llm_provider = llm_provider
self.litellm_debug_info = litellm_debug_info
super().__init__(
self.message, response=response, body=None
) # Call the base class constructor with the parameters it needs
class Timeout(openai.APITimeoutError): # type: ignore
def __init__(self, message, model, llm_provider):
def __init__(
self, message, model, llm_provider, litellm_debug_info: Optional[str] = None
):
request = httpx.Request(method="POST", url="https://api.openai.com/v1")
super().__init__(
request=request
@ -77,6 +109,7 @@ class Timeout(openai.APITimeoutError): # type: ignore
self.message = message
self.model = model
self.llm_provider = llm_provider
self.litellm_debug_info = litellm_debug_info
# custom function to convert to str
def __str__(self):
@ -84,22 +117,38 @@ class Timeout(openai.APITimeoutError): # type: ignore
class PermissionDeniedError(openai.PermissionDeniedError): # type:ignore
def __init__(self, message, llm_provider, model, response: httpx.Response):
def __init__(
self,
message,
llm_provider,
model,
response: httpx.Response,
litellm_debug_info: Optional[str] = None,
):
self.status_code = 403
self.message = message
self.llm_provider = llm_provider
self.model = model
self.litellm_debug_info = litellm_debug_info
super().__init__(
self.message, response=response, body=None
) # Call the base class constructor with the parameters it needs
class RateLimitError(openai.RateLimitError): # type: ignore
def __init__(self, message, llm_provider, model, response: httpx.Response):
def __init__(
self,
message,
llm_provider,
model,
response: httpx.Response,
litellm_debug_info: Optional[str] = None,
):
self.status_code = 429
self.message = message
self.llm_provider = llm_provider
self.modle = model
self.litellm_debug_info = litellm_debug_info
super().__init__(
self.message, response=response, body=None
) # Call the base class constructor with the parameters it needs
@ -107,11 +156,19 @@ class RateLimitError(openai.RateLimitError): # type: ignore
# sub class of rate limit error - meant to give more granularity for error handling context window exceeded errors
class ContextWindowExceededError(BadRequestError): # type: ignore
def __init__(self, message, model, llm_provider, response: httpx.Response):
def __init__(
self,
message,
model,
llm_provider,
response: httpx.Response,
litellm_debug_info: Optional[str] = None,
):
self.status_code = 400
self.message = message
self.model = model
self.llm_provider = llm_provider
self.litellm_debug_info = litellm_debug_info
super().__init__(
message=self.message,
model=self.model, # type: ignore
@ -122,11 +179,19 @@ class ContextWindowExceededError(BadRequestError): # type: ignore
class ContentPolicyViolationError(BadRequestError): # type: ignore
# Error code: 400 - {'error': {'code': 'content_policy_violation', 'message': 'Your request was rejected as a result of our safety system. Image descriptions generated from your prompt may contain text that is not allowed by our safety system. If you believe this was done in error, your request may succeed if retried, or by adjusting your prompt.', 'param': None, 'type': 'invalid_request_error'}}
def __init__(self, message, model, llm_provider, response: httpx.Response):
def __init__(
self,
message,
model,
llm_provider,
response: httpx.Response,
litellm_debug_info: Optional[str] = None,
):
self.status_code = 400
self.message = message
self.model = model
self.llm_provider = llm_provider
self.litellm_debug_info = litellm_debug_info
super().__init__(
message=self.message,
model=self.model, # type: ignore
@ -136,11 +201,19 @@ class ContentPolicyViolationError(BadRequestError): # type: ignore
class ServiceUnavailableError(openai.APIStatusError): # type: ignore
def __init__(self, message, llm_provider, model, response: httpx.Response):
def __init__(
self,
message,
llm_provider,
model,
response: httpx.Response,
litellm_debug_info: Optional[str] = None,
):
self.status_code = 503
self.message = message
self.llm_provider = llm_provider
self.model = model
self.litellm_debug_info = litellm_debug_info
super().__init__(
self.message, response=response, body=None
) # Call the base class constructor with the parameters it needs
@ -149,33 +222,51 @@ class ServiceUnavailableError(openai.APIStatusError): # type: ignore
# raise this when the API returns an invalid response object - https://github.com/openai/openai-python/blob/1be14ee34a0f8e42d3f9aa5451aa4cb161f1781f/openai/api_requestor.py#L401
class APIError(openai.APIError): # type: ignore
def __init__(
self, status_code, message, llm_provider, model, request: httpx.Request
self,
status_code,
message,
llm_provider,
model,
request: httpx.Request,
litellm_debug_info: Optional[str] = None,
):
self.status_code = status_code
self.message = message
self.llm_provider = llm_provider
self.model = model
self.litellm_debug_info = litellm_debug_info
super().__init__(self.message, request=request, body=None) # type: ignore
# raised if an invalid request (not get, delete, put, post) is made
class APIConnectionError(openai.APIConnectionError): # type: ignore
def __init__(self, message, llm_provider, model, request: httpx.Request):
def __init__(
self,
message,
llm_provider,
model,
request: httpx.Request,
litellm_debug_info: Optional[str] = None,
):
self.message = message
self.llm_provider = llm_provider
self.model = model
self.status_code = 500
self.litellm_debug_info = litellm_debug_info
super().__init__(message=self.message, request=request)
# raised if an invalid request (not get, delete, put, post) is made
class APIResponseValidationError(openai.APIResponseValidationError): # type: ignore
def __init__(self, message, llm_provider, model):
def __init__(
self, message, llm_provider, model, litellm_debug_info: Optional[str] = None
):
self.message = message
self.llm_provider = llm_provider
self.model = model
request = httpx.Request(method="POST", url="https://api.openai.com/v1")
response = httpx.Response(status_code=500, request=request)
self.litellm_debug_info = litellm_debug_info
super().__init__(response=response, body=None, message=message)

View file

@ -1744,6 +1744,30 @@
"litellm_provider": "openrouter",
"mode": "chat"
},
"openrouter/openai/gpt-4o": {
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000015,
"litellm_provider": "openrouter",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
},
"openrouter/openai/gpt-4o-2024-05-13": {
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000015,
"litellm_provider": "openrouter",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
},
"openrouter/openai/gpt-4-vision-preview": {
"max_tokens": 130000,
"input_cost_per_token": 0.00001,
@ -2943,6 +2967,24 @@
"litellm_provider": "ollama",
"mode": "completion"
},
"ollama/llama3": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 8192,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "chat"
},
"ollama/llama3:70b": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 8192,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "chat"
},
"ollama/mistral": {
"max_tokens": 8192,
"max_input_tokens": 8192,
@ -2952,6 +2994,42 @@
"litellm_provider": "ollama",
"mode": "completion"
},
"ollama/mistral-7B-Instruct-v0.1": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 8192,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "chat"
},
"ollama/mistral-7B-Instruct-v0.2": {
"max_tokens": 32768,
"max_input_tokens": 32768,
"max_output_tokens": 32768,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "chat"
},
"ollama/mixtral-8x7B-Instruct-v0.1": {
"max_tokens": 32768,
"max_input_tokens": 32768,
"max_output_tokens": 32768,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "chat"
},
"ollama/mixtral-8x22B-Instruct-v0.1": {
"max_tokens": 65536,
"max_input_tokens": 65536,
"max_output_tokens": 65536,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "chat"
},
"ollama/codellama": {
"max_tokens": 4096,
"max_input_tokens": 4096,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f04e46b02318b660.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[7926,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"884\",\"static/chunks/884-7576ee407a2ecbe6.js\",\"931\",\"static/chunks/app/page-6a39771cacf75ea6.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f04e46b02318b660.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"obp5wqVSVDMiDTC414cR8\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f04e46b02318b660.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[4858,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"884\",\"static/chunks/884-7576ee407a2ecbe6.js\",\"931\",\"static/chunks/app/page-c35c14c9afd091ec.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f04e46b02318b660.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"2ASoJGxS-D4w-vat00xMy\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[7926,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","884","static/chunks/884-7576ee407a2ecbe6.js","931","static/chunks/app/page-6a39771cacf75ea6.js"],""]
3:I[4858,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","884","static/chunks/884-7576ee407a2ecbe6.js","931","static/chunks/app/page-c35c14c9afd091ec.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["obp5wqVSVDMiDTC414cR8",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f04e46b02318b660.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["2ASoJGxS-D4w-vat00xMy",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f04e46b02318b660.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -18,11 +18,6 @@ model_list:
model: azure/chatgpt-v-2
api_key: os.environ/AZURE_API_KEY
api_base: os.environ/AZURE_API_BASE
input_cost_per_token: 0.0
output_cost_per_token: 0.0
- model_name: gpt-3.5-turbo
litellm_params:
model: gpt-4o
- model_name: bert-classifier
litellm_params:
model: huggingface/text-classification/shahrukhx01/question-vs-statement-classifier
@ -34,14 +29,17 @@ router_settings:
enable_pre_call_checks: true
litellm_settings:
set_verbose: True
fallbacks: [{"gpt-3.5-turbo-012": ["gpt-3.5-turbo-0125-preview"]}]
fallbacks: [{"gpt-3.5-turbo-012": ["azure-gpt-3.5-turbo"]}]
# service_callback: ["prometheus_system"]
# success_callback: ["prometheus"]
# failure_callback: ["prometheus"]
general_settings:
enable_jwt_auth: True
litellm_jwtauth:
team_id_default: "1234"
user_id_jwt_field:
user_id_upsert: True
disable_reset_budget: True
proxy_batch_write_at: 10 # 👈 Frequency of batch writing logs to server (in seconds)
routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"

View file

@ -1,37 +1,11 @@
from pydantic import ConfigDict, BaseModel, Field, root_validator, Json, VERSION
from pydantic import BaseModel, Extra, Field, root_validator, Json, validator
from dataclasses import fields
import enum
from typing import Optional, List, Union, Dict, Literal, Any
from datetime import datetime
import uuid
import json
import uuid, json, sys, os
from litellm.types.router import UpdateRouterConfig
try:
from pydantic import model_validator # type: ignore
except ImportError:
from pydantic import root_validator # pydantic v1
def model_validator(mode): # type: ignore
pre = mode == "before"
return root_validator(pre=pre)
# Function to get Pydantic version
def is_pydantic_v2() -> int:
return int(VERSION.split(".")[0])
def get_model_config(arbitrary_types_allowed: bool = False) -> ConfigDict:
# Version-specific configuration
if is_pydantic_v2() >= 2:
model_config = ConfigDict(extra="allow", arbitrary_types_allowed=arbitrary_types_allowed, protected_namespaces=()) # type: ignore
else:
from pydantic import Extra
model_config = ConfigDict(extra=Extra.allow, arbitrary_types_allowed=arbitrary_types_allowed) # type: ignore
return model_config
def hash_token(token: str):
import hashlib
@ -61,7 +35,8 @@ class LiteLLMBase(BaseModel):
# if using pydantic v1
return self.__fields_set__
model_config = get_model_config()
class Config:
protected_namespaces = ()
class LiteLLM_UpperboundKeyGenerateParams(LiteLLMBase):
@ -104,11 +79,6 @@ class LiteLLMRoutes(enum.Enum):
"/v1/models",
]
# NOTE: ROUTES ONLY FOR MASTER KEY - only the Master Key should be able to Reset Spend
master_key_only_routes: List = [
"/global/spend/reset",
]
info_routes: List = [
"/key/info",
"/team/info",
@ -119,6 +89,11 @@ class LiteLLMRoutes(enum.Enum):
"/v2/key/info",
]
# NOTE: ROUTES ONLY FOR MASTER KEY - only the Master Key should be able to Reset Spend
master_key_only_routes: List = [
"/global/spend/reset",
]
sso_only_routes: List = [
"/key/generate",
"/key/update",
@ -227,13 +202,19 @@ class LiteLLM_JWTAuth(LiteLLMBase):
"global_spend_tracking_routes",
"info_routes",
]
team_jwt_scope: str = "litellm_team"
team_id_jwt_field: str = "client_id"
team_id_jwt_field: Optional[str] = None
team_allowed_routes: List[
Literal["openai_routes", "info_routes", "management_routes"]
] = ["openai_routes", "info_routes"]
team_id_default: Optional[str] = Field(
default=None,
description="If no team_id given, default permissions/spend-tracking to this team.s",
)
org_id_jwt_field: Optional[str] = None
user_id_jwt_field: Optional[str] = None
user_id_upsert: bool = Field(
default=False, description="If user doesn't exist, upsert them into the db."
)
end_user_id_jwt_field: Optional[str] = None
public_key_ttl: float = 600
@ -259,7 +240,7 @@ class LiteLLMPromptInjectionParams(LiteLLMBase):
llm_api_system_prompt: Optional[str] = None
llm_api_fail_call_string: Optional[str] = None
@model_validator(mode="before")
@root_validator(pre=True)
def check_llm_api_params(cls, values):
llm_api_check = values.get("llm_api_check")
if llm_api_check is True:
@ -317,7 +298,8 @@ class ProxyChatCompletionRequest(LiteLLMBase):
deployment_id: Optional[str] = None
request_timeout: Optional[int] = None
model_config = get_model_config()
class Config:
extra = "allow" # allow params not defined here, these fall in litellm.completion(**kwargs)
class ModelInfoDelete(LiteLLMBase):
@ -344,9 +326,11 @@ class ModelInfo(LiteLLMBase):
]
]
model_config = get_model_config()
class Config:
extra = Extra.allow # Allow extra fields
protected_namespaces = ()
@model_validator(mode="before")
@root_validator(pre=True)
def set_model_info(cls, values):
if values.get("id") is None:
values.update({"id": str(uuid.uuid4())})
@ -372,9 +356,10 @@ class ModelParams(LiteLLMBase):
litellm_params: dict
model_info: ModelInfo
model_config = get_model_config()
class Config:
protected_namespaces = ()
@model_validator(mode="before")
@root_validator(pre=True)
def set_model_info(cls, values):
if values.get("model_info") is None:
values.update({"model_info": ModelInfo()})
@ -410,7 +395,8 @@ class GenerateKeyRequest(GenerateRequestBase):
{}
) # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
model_config = get_model_config()
class Config:
protected_namespaces = ()
class GenerateKeyResponse(GenerateKeyRequest):
@ -420,7 +406,7 @@ class GenerateKeyResponse(GenerateKeyRequest):
user_id: Optional[str] = None
token_id: Optional[str] = None
@model_validator(mode="before")
@root_validator(pre=True)
def set_model_info(cls, values):
if values.get("token") is not None:
values.update({"key": values.get("token")})
@ -460,7 +446,8 @@ class LiteLLM_ModelTable(LiteLLMBase):
created_by: str
updated_by: str
model_config = get_model_config()
class Config:
protected_namespaces = ()
class NewUserRequest(GenerateKeyRequest):
@ -488,7 +475,7 @@ class UpdateUserRequest(GenerateRequestBase):
user_role: Optional[str] = None
max_budget: Optional[float] = None
@model_validator(mode="before")
@root_validator(pre=True)
def check_user_info(cls, values):
if values.get("user_id") is None and values.get("user_email") is None:
raise ValueError("Either user id or user email must be provided")
@ -508,7 +495,7 @@ class NewEndUserRequest(LiteLLMBase):
None # if no equivalent model in allowed region - default all requests to this model
)
@model_validator(mode="before")
@root_validator(pre=True)
def check_user_info(cls, values):
if values.get("max_budget") is not None and values.get("budget_id") is not None:
raise ValueError("Set either 'max_budget' or 'budget_id', not both.")
@ -521,7 +508,7 @@ class Member(LiteLLMBase):
user_id: Optional[str] = None
user_email: Optional[str] = None
@model_validator(mode="before")
@root_validator(pre=True)
def check_user_info(cls, values):
if values.get("user_id") is None and values.get("user_email") is None:
raise ValueError("Either user id or user email must be provided")
@ -546,7 +533,8 @@ class TeamBase(LiteLLMBase):
class NewTeamRequest(TeamBase):
model_aliases: Optional[dict] = None
model_config = get_model_config()
class Config:
protected_namespaces = ()
class GlobalEndUsersSpend(LiteLLMBase):
@ -565,7 +553,7 @@ class TeamMemberDeleteRequest(LiteLLMBase):
user_id: Optional[str] = None
user_email: Optional[str] = None
@model_validator(mode="before")
@root_validator(pre=True)
def check_user_info(cls, values):
if values.get("user_id") is None and values.get("user_email") is None:
raise ValueError("Either user id or user email must be provided")
@ -599,9 +587,10 @@ class LiteLLM_TeamTable(TeamBase):
budget_reset_at: Optional[datetime] = None
model_id: Optional[int] = None
model_config = get_model_config()
class Config:
protected_namespaces = ()
@model_validator(mode="before")
@root_validator(pre=True)
def set_model_info(cls, values):
dict_fields = [
"metadata",
@ -637,7 +626,8 @@ class LiteLLM_BudgetTable(LiteLLMBase):
model_max_budget: Optional[dict] = None
budget_duration: Optional[str] = None
model_config = get_model_config()
class Config:
protected_namespaces = ()
class NewOrganizationRequest(LiteLLM_BudgetTable):
@ -687,7 +677,8 @@ class KeyManagementSettings(LiteLLMBase):
class TeamDefaultSettings(LiteLLMBase):
team_id: str
model_config = get_model_config()
class Config:
extra = "allow" # allow params not defined here, these fall in litellm.completion(**kwargs)
class DynamoDBArgs(LiteLLMBase):
@ -711,6 +702,25 @@ class DynamoDBArgs(LiteLLMBase):
assume_role_aws_session_name: Optional[str] = None
class ConfigFieldUpdate(LiteLLMBase):
field_name: str
field_value: Any
config_type: Literal["general_settings"]
class ConfigFieldDelete(LiteLLMBase):
config_type: Literal["general_settings"]
field_name: str
class ConfigList(LiteLLMBase):
field_name: str
field_type: str
field_description: str
field_value: Any
stored_in_db: Optional[bool]
class ConfigGeneralSettings(LiteLLMBase):
"""
Documents all the fields supported by `general_settings` in config.yaml
@ -758,7 +768,11 @@ class ConfigGeneralSettings(LiteLLMBase):
description="override user_api_key_auth with your own auth script - https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth",
)
max_parallel_requests: Optional[int] = Field(
None, description="maximum parallel requests for each api key"
None,
description="maximum parallel requests for each api key",
)
global_max_parallel_requests: Optional[int] = Field(
None, description="global max parallel requests to allow for a proxy instance."
)
infer_model_from_keys: Optional[bool] = Field(
None,
@ -828,7 +842,8 @@ class ConfigYAML(LiteLLMBase):
description="litellm router object settings. See router.py __init__ for all, example router.num_retries=5, router.timeout=5, router.max_retries=5, router.retry_after=5",
)
model_config = get_model_config()
class Config:
protected_namespaces = ()
class LiteLLM_VerificationToken(LiteLLMBase):
@ -862,7 +877,8 @@ class LiteLLM_VerificationToken(LiteLLMBase):
user_id_rate_limits: Optional[dict] = None
team_id_rate_limits: Optional[dict] = None
model_config = get_model_config()
class Config:
protected_namespaces = ()
class LiteLLM_VerificationTokenView(LiteLLM_VerificationToken):
@ -892,7 +908,7 @@ class UserAPIKeyAuth(
user_role: Optional[Literal["proxy_admin", "app_owner", "app_user"]] = None
allowed_model_region: Optional[Literal["eu"]] = None
@model_validator(mode="before")
@root_validator(pre=True)
def check_api_key(cls, values):
if values.get("api_key") is not None:
values.update({"token": hash_token(values.get("api_key"))})
@ -919,7 +935,7 @@ class LiteLLM_UserTable(LiteLLMBase):
tpm_limit: Optional[int] = None
rpm_limit: Optional[int] = None
@model_validator(mode="before")
@root_validator(pre=True)
def set_model_info(cls, values):
if values.get("spend") is None:
values.update({"spend": 0.0})
@ -927,7 +943,8 @@ class LiteLLM_UserTable(LiteLLMBase):
values.update({"models": []})
return values
model_config = get_model_config()
class Config:
protected_namespaces = ()
class LiteLLM_EndUserTable(LiteLLMBase):
@ -939,13 +956,14 @@ class LiteLLM_EndUserTable(LiteLLMBase):
default_model: Optional[str] = None
litellm_budget_table: Optional[LiteLLM_BudgetTable] = None
@model_validator(mode="before")
@root_validator(pre=True)
def set_model_info(cls, values):
if values.get("spend") is None:
values.update({"spend": 0.0})
return values
model_config = get_model_config()
class Config:
protected_namespaces = ()
class LiteLLM_SpendLogs(LiteLLMBase):

View file

@ -26,7 +26,7 @@ all_routes = LiteLLMRoutes.openai_routes.value + LiteLLMRoutes.management_routes
def common_checks(
request_body: dict,
team_object: LiteLLM_TeamTable,
team_object: Optional[LiteLLM_TeamTable],
user_object: Optional[LiteLLM_UserTable],
end_user_object: Optional[LiteLLM_EndUserTable],
global_proxy_spend: Optional[float],
@ -45,13 +45,14 @@ def common_checks(
6. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
"""
_model = request_body.get("model", None)
if team_object.blocked == True:
if team_object is not None and team_object.blocked == True:
raise Exception(
f"Team={team_object.team_id} is blocked. Update via `/team/unblock` if your admin."
)
# 2. If user can call model
if (
_model is not None
and team_object is not None
and len(team_object.models) > 0
and _model not in team_object.models
):
@ -65,7 +66,8 @@ def common_checks(
)
# 3. If team is in budget
if (
team_object.max_budget is not None
team_object is not None
and team_object.max_budget is not None
and team_object.spend is not None
and team_object.spend > team_object.max_budget
):
@ -239,6 +241,7 @@ async def get_user_object(
user_id: str,
prisma_client: Optional[PrismaClient],
user_api_key_cache: DualCache,
user_id_upsert: bool,
) -> Optional[LiteLLM_UserTable]:
"""
- Check if user id in proxy User Table
@ -252,7 +255,7 @@ async def get_user_object(
return None
# check if in cache
cached_user_obj = user_api_key_cache.async_get_cache(key=user_id)
cached_user_obj = await user_api_key_cache.async_get_cache(key=user_id)
if cached_user_obj is not None:
if isinstance(cached_user_obj, dict):
return LiteLLM_UserTable(**cached_user_obj)
@ -260,16 +263,27 @@ async def get_user_object(
return cached_user_obj
# else, check db
try:
response = await prisma_client.db.litellm_usertable.find_unique(
where={"user_id": user_id}
)
if response is None:
if user_id_upsert:
response = await prisma_client.db.litellm_usertable.create(
data={"user_id": user_id}
)
else:
raise Exception
return LiteLLM_UserTable(**response.dict())
except Exception as e: # if end-user not in db
raise Exception(
_response = LiteLLM_UserTable(**dict(response))
# save the user object to cache
await user_api_key_cache.async_set_cache(key=user_id, value=_response)
return _response
except Exception as e: # if user not in db
raise ValueError(
f"User doesn't exist in db. 'user_id'={user_id}. Create user via `/user/new` call."
)
@ -290,7 +304,7 @@ async def get_team_object(
)
# check if in cache
cached_team_obj = user_api_key_cache.async_get_cache(key=team_id)
cached_team_obj = await user_api_key_cache.async_get_cache(key=team_id)
if cached_team_obj is not None:
if isinstance(cached_team_obj, dict):
return LiteLLM_TeamTable(**cached_team_obj)
@ -305,7 +319,11 @@ async def get_team_object(
if response is None:
raise Exception
return LiteLLM_TeamTable(**response.dict())
_response = LiteLLM_TeamTable(**response.dict())
# save the team object to cache
await user_api_key_cache.async_set_cache(key=response.team_id, value=_response)
return _response
except Exception as e:
raise Exception(
f"Team doesn't exist in db. Team={team_id}. Create team via `/team/new` call."

View file

@ -55,12 +55,9 @@ class JWTHandler:
return True
return False
def is_team(self, scopes: list) -> bool:
if self.litellm_jwtauth.team_jwt_scope in scopes:
return True
return False
def get_end_user_id(self, token: dict, default_value: Optional[str]) -> str:
def get_end_user_id(
self, token: dict, default_value: Optional[str]
) -> Optional[str]:
try:
if self.litellm_jwtauth.end_user_id_jwt_field is not None:
user_id = token[self.litellm_jwtauth.end_user_id_jwt_field]
@ -70,13 +67,36 @@ class JWTHandler:
user_id = default_value
return user_id
def is_required_team_id(self) -> bool:
"""
Returns:
- True: if 'team_id_jwt_field' is set
- False: if not
"""
if self.litellm_jwtauth.team_id_jwt_field is None:
return False
return True
def get_team_id(self, token: dict, default_value: Optional[str]) -> Optional[str]:
try:
if self.litellm_jwtauth.team_id_jwt_field is not None:
team_id = token[self.litellm_jwtauth.team_id_jwt_field]
elif self.litellm_jwtauth.team_id_default is not None:
team_id = self.litellm_jwtauth.team_id_default
else:
team_id = None
except KeyError:
team_id = default_value
return team_id
def is_upsert_user_id(self) -> bool:
"""
Returns:
- True: if 'user_id_upsert' is set
- False: if not
"""
return self.litellm_jwtauth.user_id_upsert
def get_user_id(self, token: dict, default_value: Optional[str]) -> Optional[str]:
try:
if self.litellm_jwtauth.user_id_jwt_field is not None:
@ -207,12 +227,14 @@ class JWTHandler:
raise Exception(f"Validation fails: {str(e)}")
elif public_key is not None and isinstance(public_key, str):
try:
cert = x509.load_pem_x509_certificate(public_key.encode(), default_backend())
cert = x509.load_pem_x509_certificate(
public_key.encode(), default_backend()
)
# Extract public key
key = cert.public_key().public_bytes(
serialization.Encoding.PEM,
serialization.PublicFormat.SubjectPublicKeyInfo
serialization.PublicFormat.SubjectPublicKeyInfo,
)
# decode the token using the public key
@ -221,7 +243,7 @@ class JWTHandler:
key,
algorithms=algorithms,
audience=audience,
options=decode_options
options=decode_options,
)
return payload

View file

@ -79,6 +79,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
max_parallel_requests = user_api_key_dict.max_parallel_requests
if max_parallel_requests is None:
max_parallel_requests = sys.maxsize
global_max_parallel_requests = data.get("metadata", {}).get(
"global_max_parallel_requests", None
)
tpm_limit = getattr(user_api_key_dict, "tpm_limit", sys.maxsize)
if tpm_limit is None:
tpm_limit = sys.maxsize
@ -91,6 +94,24 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
# Setup values
# ------------
if global_max_parallel_requests is not None:
# get value from cache
_key = "global_max_parallel_requests"
current_global_requests = await cache.async_get_cache(
key=_key, local_only=True
)
# check if below limit
if current_global_requests is None:
current_global_requests = 1
# if above -> raise error
if current_global_requests >= global_max_parallel_requests:
raise HTTPException(
status_code=429, detail="Max parallel request limit reached."
)
# if below -> increment
else:
await cache.async_increment_cache(key=_key, value=1, local_only=True)
current_date = datetime.now().strftime("%Y-%m-%d")
current_hour = datetime.now().strftime("%H")
current_minute = datetime.now().strftime("%M")
@ -207,6 +228,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
try:
self.print_verbose(f"INSIDE parallel request limiter ASYNC SUCCESS LOGGING")
global_max_parallel_requests = kwargs["litellm_params"]["metadata"].get(
"global_max_parallel_requests", None
)
user_api_key = kwargs["litellm_params"]["metadata"]["user_api_key"]
user_api_key_user_id = kwargs["litellm_params"]["metadata"].get(
"user_api_key_user_id", None
@ -222,6 +246,14 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
# Setup values
# ------------
if global_max_parallel_requests is not None:
# get value from cache
_key = "global_max_parallel_requests"
# decrement
await self.user_api_key_cache.async_increment_cache(
key=_key, value=-1, local_only=True
)
current_date = datetime.now().strftime("%Y-%m-%d")
current_hour = datetime.now().strftime("%H")
current_minute = datetime.now().strftime("%M")
@ -336,6 +368,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
try:
self.print_verbose(f"Inside Max Parallel Request Failure Hook")
global_max_parallel_requests = kwargs["litellm_params"]["metadata"].get(
"global_max_parallel_requests", None
)
user_api_key = (
kwargs["litellm_params"].get("metadata", {}).get("user_api_key", None)
)
@ -347,17 +382,26 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
return
## decrement call count if call failed
if (
hasattr(kwargs["exception"], "status_code")
and kwargs["exception"].status_code == 429
and "Max parallel request limit reached" in str(kwargs["exception"])
):
if "Max parallel request limit reached" in str(kwargs["exception"]):
pass # ignore failed calls due to max limit being reached
else:
# ------------
# Setup values
# ------------
if global_max_parallel_requests is not None:
# get value from cache
_key = "global_max_parallel_requests"
current_global_requests = (
await self.user_api_key_cache.async_get_cache(
key=_key, local_only=True
)
)
# decrement
await self.user_api_key_cache.async_increment_cache(
key=_key, value=-1, local_only=True
)
current_date = datetime.now().strftime("%Y-%m-%d")
current_hour = datetime.now().strftime("%H")
current_minute = datetime.now().strftime("%M")

View file

@ -234,6 +234,7 @@ class SpecialModelNames(enum.Enum):
class CommonProxyErrors(enum.Enum):
db_not_connected_error = "DB not connected"
no_llm_router = "No models configured on proxy"
not_allowed_access = "Admin-only endpoint. Not allowed to access this."
@app.exception_handler(ProxyException)
@ -440,10 +441,13 @@ async def user_api_key_auth(
# get team id
team_id = jwt_handler.get_team_id(token=valid_token, default_value=None)
if team_id is None:
if team_id is None and jwt_handler.is_required_team_id() == True:
raise Exception(
f"No team id passed in. Field checked in jwt token - '{jwt_handler.litellm_jwtauth.team_id_jwt_field}'"
)
team_object: Optional[LiteLLM_TeamTable] = None
if team_id is not None:
# check allowed team routes
is_allowed = allowed_routes_check(
user_role="team",
@ -481,11 +485,9 @@ async def user_api_key_auth(
user_id=user_id,
prisma_client=prisma_client,
user_api_key_cache=user_api_key_cache,
user_id_upsert=jwt_handler.is_upsert_user_id(),
)
# save the user object to cache
await user_api_key_cache.async_set_cache(
key=user_id, value=user_object
)
# [OPTIONAL] track spend against an external user - `LiteLLM_EndUserTable`
end_user_object = None
end_user_id = jwt_handler.get_end_user_id(
@ -547,18 +549,18 @@ async def user_api_key_auth(
global_proxy_spend=global_proxy_spend,
route=route,
)
# save team object in cache
await user_api_key_cache.async_set_cache(
key=team_object.team_id, value=team_object
)
# return UserAPIKeyAuth object
return UserAPIKeyAuth(
api_key=None,
team_id=team_object.team_id,
team_tpm_limit=team_object.tpm_limit,
team_rpm_limit=team_object.rpm_limit,
team_models=team_object.models,
team_id=team_object.team_id if team_object is not None else None,
team_tpm_limit=(
team_object.tpm_limit if team_object is not None else None
),
team_rpm_limit=(
team_object.rpm_limit if team_object is not None else None
),
team_models=team_object.models if team_object is not None else [],
user_role="app_owner",
user_id=user_id,
org_id=org_id,
@ -566,9 +568,9 @@ async def user_api_key_auth(
#### ELSE ####
if master_key is None:
if isinstance(api_key, str):
return UserAPIKeyAuth(api_key=api_key)
return UserAPIKeyAuth(api_key=api_key, user_role="proxy_admin")
else:
return UserAPIKeyAuth()
return UserAPIKeyAuth(user_role="proxy_admin")
elif api_key is None: # only require api key if master key is set
raise Exception("No api key passed in.")
elif api_key == "":
@ -659,6 +661,7 @@ async def user_api_key_auth(
verbose_proxy_logger.debug("Token from db: %s", valid_token)
elif valid_token is not None:
verbose_proxy_logger.debug("API Key Cache Hit!")
user_id_information = None
if valid_token:
# Got Valid Token from Cache, DB
@ -1187,6 +1190,17 @@ async def user_api_key_auth(
# No token was found when looking up in the DB
raise Exception("Invalid token passed")
if valid_token_dict is not None:
if user_id_information is not None and _is_user_proxy_admin(
user_id_information
):
return UserAPIKeyAuth(
api_key=api_key, user_role="proxy_admin", **valid_token_dict
)
elif _has_user_setup_sso() and route in LiteLLMRoutes.sso_only_routes.value:
return UserAPIKeyAuth(
api_key=api_key, user_role="app_owner", **valid_token_dict
)
else:
return UserAPIKeyAuth(api_key=api_key, **valid_token_dict)
else:
raise Exception()
@ -2684,7 +2698,19 @@ class ProxyConfig:
"Error setting env variable: %s - %s", k, str(e)
)
# general_settings
# router settings
if llm_router is not None and prisma_client is not None:
db_router_settings = await prisma_client.db.litellm_config.find_first(
where={"param_name": "router_settings"}
)
if (
db_router_settings is not None
and db_router_settings.param_value is not None
):
_router_settings = db_router_settings.param_value
llm_router.update_settings(**_router_settings)
## ALERTING ## [TODO] move this to the _update_general_settings() block
_general_settings = config_data.get("general_settings", {})
if "alerting" in _general_settings:
general_settings["alerting"] = _general_settings["alerting"]
@ -2708,17 +2734,24 @@ class ProxyConfig:
alert_to_webhook_url=general_settings["alert_to_webhook_url"]
)
# router settings
if llm_router is not None and prisma_client is not None:
db_router_settings = await prisma_client.db.litellm_config.find_first(
where={"param_name": "router_settings"}
)
if (
db_router_settings is not None
and db_router_settings.param_value is not None
):
_router_settings = db_router_settings.param_value
llm_router.update_settings(**_router_settings)
async def _update_general_settings(self, db_general_settings: Optional[Json]):
"""
Pull from DB, read general settings value
"""
global general_settings
if db_general_settings is None:
return
_general_settings = dict(db_general_settings)
## MAX PARALLEL REQUESTS ##
if "max_parallel_requests" in _general_settings:
general_settings["max_parallel_requests"] = _general_settings[
"max_parallel_requests"
]
if "global_max_parallel_requests" in _general_settings:
general_settings["global_max_parallel_requests"] = _general_settings[
"global_max_parallel_requests"
]
async def add_deployment(
self,
@ -2726,7 +2759,7 @@ class ProxyConfig:
proxy_logging_obj: ProxyLogging,
):
"""
- Check db for new models (last 10 most recently updated)
- Check db for new models
- Check if model id's in router already
- If not, add to router
"""
@ -2739,9 +2772,21 @@ class ProxyConfig:
)
verbose_proxy_logger.debug(f"llm_router: {llm_router}")
new_models = await prisma_client.db.litellm_proxymodeltable.find_many()
# update llm router
await self._update_llm_router(
new_models=new_models, proxy_logging_obj=proxy_logging_obj
)
db_general_settings = await prisma_client.db.litellm_config.find_first(
where={"param_name": "general_settings"}
)
# update general settings
if db_general_settings is not None:
await self._update_general_settings(
db_general_settings=db_general_settings.param_value,
)
except Exception as e:
verbose_proxy_logger.error(
"{}\nTraceback:{}".format(str(e), traceback.format_exc())
@ -2941,27 +2986,6 @@ async def generate_key_helper_fn(
data=key_data, table_name="key"
)
key_data["token_id"] = getattr(create_key_response, "token", None)
elif custom_db_client is not None:
if table_name is None or table_name == "user":
## CREATE USER (If necessary)
verbose_proxy_logger.debug(
"CustomDBClient: Creating User= %s", user_data
)
user_row = await custom_db_client.insert_data(
value=user_data, table_name="user"
)
if user_row is None:
# GET USER ROW
user_row = await custom_db_client.get_data(
key=user_id, table_name="user" # type: ignore
)
## use default user model list if no key-specific model list provided
if len(user_row.models) > 0 and len(key_data["models"]) == 0: # type: ignore
key_data["models"] = user_row.models
## CREATE KEY
verbose_proxy_logger.debug("CustomDBClient: Creating Key= %s", key_data)
await custom_db_client.insert_data(value=key_data, table_name="key")
except Exception as e:
traceback.print_exc()
if isinstance(e, HTTPException):
@ -3557,6 +3581,9 @@ async def chat_completion(
data["metadata"]["user_api_key_alias"] = getattr(
user_api_key_dict, "key_alias", None
)
data["metadata"]["global_max_parallel_requests"] = general_settings.get(
"global_max_parallel_requests", None
)
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
data["metadata"]["user_api_key_org_id"] = user_api_key_dict.org_id
data["metadata"]["user_api_key_team_id"] = getattr(
@ -3746,8 +3773,11 @@ async def chat_completion(
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
)
litellm_debug_info = getattr(e, "litellm_debug_info", "")
verbose_proxy_logger.debug(
f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`"
"\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
e,
litellm_debug_info,
)
router_model_names = llm_router.model_names if llm_router is not None else []
if user_debug:
@ -3795,6 +3825,7 @@ async def completion(
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
global user_temperature, user_request_timeout, user_max_tokens, user_api_base
data = {}
check_request_disconnected = None
try:
body = await request.body()
@ -3824,6 +3855,9 @@ async def completion(
data["metadata"]["user_api_key_team_id"] = getattr(
user_api_key_dict, "team_id", None
)
data["metadata"]["global_max_parallel_requests"] = general_settings.get(
"global_max_parallel_requests", None
)
data["metadata"]["user_api_key_team_alias"] = getattr(
user_api_key_dict, "team_alias", None
)
@ -3904,6 +3938,9 @@ async def completion(
cache_key = hidden_params.get("cache_key", None) or ""
api_base = hidden_params.get("api_base", None) or ""
### ALERTING ###
data["litellm_status"] = "success" # used for alerting
verbose_proxy_logger.debug("final response: %s", response)
if (
"stream" in data and data["stream"] == True
@ -3934,10 +3971,15 @@ async def completion(
return response
except Exception as e:
data["litellm_status"] = "fail" # used for alerting
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
)
verbose_proxy_logger.debug("EXCEPTION RAISED IN PROXY MAIN.PY")
litellm_debug_info = getattr(e, "litellm_debug_info", "")
verbose_proxy_logger.debug(
"\033[1;31mAn error occurred: %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
"\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
e,
litellm_debug_info,
)
traceback.print_exc()
error_traceback = traceback.format_exc()
@ -4015,6 +4057,9 @@ async def embeddings(
data["metadata"]["user_api_key_alias"] = getattr(
user_api_key_dict, "key_alias", None
)
data["metadata"]["global_max_parallel_requests"] = general_settings.get(
"global_max_parallel_requests", None
)
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
data["metadata"]["user_api_key_team_id"] = getattr(
user_api_key_dict, "team_id", None
@ -4140,6 +4185,12 @@ async def embeddings(
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
)
litellm_debug_info = getattr(e, "litellm_debug_info", "")
verbose_proxy_logger.debug(
"\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
e,
litellm_debug_info,
)
traceback.print_exc()
if isinstance(e, HTTPException):
raise ProxyException(
@ -4213,6 +4264,9 @@ async def image_generation(
data["metadata"]["user_api_key_alias"] = getattr(
user_api_key_dict, "key_alias", None
)
data["metadata"]["global_max_parallel_requests"] = general_settings.get(
"global_max_parallel_requests", None
)
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
data["metadata"]["user_api_key_team_id"] = getattr(
user_api_key_dict, "team_id", None
@ -4393,6 +4447,9 @@ async def audio_transcriptions(
data["metadata"]["user_api_key_team_id"] = getattr(
user_api_key_dict, "team_id", None
)
data["metadata"]["global_max_parallel_requests"] = general_settings.get(
"global_max_parallel_requests", None
)
data["metadata"]["user_api_key_team_alias"] = getattr(
user_api_key_dict, "team_alias", None
)
@ -4590,6 +4647,9 @@ async def moderations(
"authorization", None
) # do not store the original `sk-..` api key in the db
data["metadata"]["headers"] = _headers
data["metadata"]["global_max_parallel_requests"] = general_settings.get(
"global_max_parallel_requests", None
)
data["metadata"]["user_api_key_alias"] = getattr(
user_api_key_dict, "key_alias", None
)
@ -7697,7 +7757,12 @@ async def new_organization(
If none provided, create one based on provided values
"""
budget_row = LiteLLM_BudgetTable(**data.json(exclude_none=True))
budget_params = LiteLLM_BudgetTable.model_fields.keys()
# Only include Budget Params when creating an entry in litellm_budgettable
_json_data = data.json(exclude_none=True)
_budget_data = {k: v for k, v in _json_data.items() if k in budget_params}
budget_row = LiteLLM_BudgetTable(**_budget_data)
new_budget = prisma_client.jsonify_object(budget_row.json(exclude_none=True))
@ -9288,7 +9353,7 @@ async def auth_callback(request: Request):
return RedirectResponse(url=litellm_dashboard_ui)
#### BASIC ENDPOINTS ####
#### CONFIG MANAGEMENT ####
@router.post(
"/config/update",
tags=["config.yaml"],
@ -9424,6 +9489,299 @@ async def update_config(config_info: ConfigYAML):
)
### CONFIG GENERAL SETTINGS
"""
- Update config settings
- Get config settings
Keep it more precise, to prevent overwrite other values unintentially
"""
@router.post(
"/config/field/update",
tags=["config.yaml"],
dependencies=[Depends(user_api_key_auth)],
)
async def update_config_general_settings(
data: ConfigFieldUpdate,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
Update a specific field in litellm general settings
"""
global prisma_client
## VALIDATION ##
"""
- Check if prisma_client is None
- Check if user allowed to call this endpoint (admin-only)
- Check if param in general settings
- Check if config value is valid type
"""
if prisma_client is None:
raise HTTPException(
status_code=400,
detail={"error": CommonProxyErrors.db_not_connected_error.value},
)
if user_api_key_dict.user_role != "proxy_admin":
raise HTTPException(
status_code=400,
detail={"error": CommonProxyErrors.not_allowed_access.value},
)
if data.field_name not in ConfigGeneralSettings.model_fields:
raise HTTPException(
status_code=400,
detail={"error": "Invalid field={} passed in.".format(data.field_name)},
)
try:
cgs = ConfigGeneralSettings(**{data.field_name: data.field_value})
except:
raise HTTPException(
status_code=400,
detail={
"error": "Invalid type of field value={} passed in.".format(
type(data.field_value),
)
},
)
## get general settings from db
db_general_settings = await prisma_client.db.litellm_config.find_first(
where={"param_name": "general_settings"}
)
### update value
if db_general_settings is None or db_general_settings.param_value is None:
general_settings = {}
else:
general_settings = dict(db_general_settings.param_value)
## update db
general_settings[data.field_name] = data.field_value
response = await prisma_client.db.litellm_config.upsert(
where={"param_name": "general_settings"},
data={
"create": {"param_name": "general_settings", "param_value": json.dumps(general_settings)}, # type: ignore
"update": {"param_value": json.dumps(general_settings)}, # type: ignore
},
)
return response
@router.get(
"/config/field/info",
tags=["config.yaml"],
dependencies=[Depends(user_api_key_auth)],
)
async def get_config_general_settings(
field_name: str,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
global prisma_client
## VALIDATION ##
"""
- Check if prisma_client is None
- Check if user allowed to call this endpoint (admin-only)
- Check if param in general settings
"""
if prisma_client is None:
raise HTTPException(
status_code=400,
detail={"error": CommonProxyErrors.db_not_connected_error.value},
)
if user_api_key_dict.user_role != "proxy_admin":
raise HTTPException(
status_code=400,
detail={"error": CommonProxyErrors.not_allowed_access.value},
)
if field_name not in ConfigGeneralSettings.model_fields:
raise HTTPException(
status_code=400,
detail={"error": "Invalid field={} passed in.".format(field_name)},
)
## get general settings from db
db_general_settings = await prisma_client.db.litellm_config.find_first(
where={"param_name": "general_settings"}
)
### pop the value
if db_general_settings is None or db_general_settings.param_value is None:
raise HTTPException(
status_code=400,
detail={"error": "Field name={} not in DB".format(field_name)},
)
else:
general_settings = dict(db_general_settings.param_value)
if field_name in general_settings:
return {
"field_name": field_name,
"field_value": general_settings[field_name],
}
else:
raise HTTPException(
status_code=400,
detail={"error": "Field name={} not in DB".format(field_name)},
)
@router.get(
"/config/list",
tags=["config.yaml"],
dependencies=[Depends(user_api_key_auth)],
)
async def get_config_list(
config_type: Literal["general_settings"],
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
) -> List[ConfigList]:
"""
List the available fields + current values for a given type of setting (currently just 'general_settings'user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),)
"""
global prisma_client, general_settings
## VALIDATION ##
"""
- Check if prisma_client is None
- Check if user allowed to call this endpoint (admin-only)
- Check if param in general settings
"""
if prisma_client is None:
raise HTTPException(
status_code=400,
detail={"error": CommonProxyErrors.db_not_connected_error.value},
)
if user_api_key_dict.user_role != "proxy_admin":
raise HTTPException(
status_code=400,
detail={
"error": "{}, your role={}".format(
CommonProxyErrors.not_allowed_access.value,
user_api_key_dict.user_role,
)
},
)
## get general settings from db
db_general_settings = await prisma_client.db.litellm_config.find_first(
where={"param_name": "general_settings"}
)
if db_general_settings is not None and db_general_settings.param_value is not None:
db_general_settings_dict = dict(db_general_settings.param_value)
else:
db_general_settings_dict = {}
allowed_args = {
"max_parallel_requests": {"type": "Integer"},
"global_max_parallel_requests": {"type": "Integer"},
}
return_val = []
for field_name, field_info in ConfigGeneralSettings.model_fields.items():
if field_name in allowed_args:
_stored_in_db = None
if field_name in db_general_settings_dict:
_stored_in_db = True
elif field_name in general_settings:
_stored_in_db = False
_response_obj = ConfigList(
field_name=field_name,
field_type=allowed_args[field_name]["type"],
field_description=field_info.description or "",
field_value=general_settings.get(field_name, None),
stored_in_db=_stored_in_db,
)
return_val.append(_response_obj)
return return_val
@router.post(
"/config/field/delete",
tags=["config.yaml"],
dependencies=[Depends(user_api_key_auth)],
)
async def delete_config_general_settings(
data: ConfigFieldDelete,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
Delete the db value of this field in litellm general settings. Resets it to it's initial default value on litellm.
"""
global prisma_client
## VALIDATION ##
"""
- Check if prisma_client is None
- Check if user allowed to call this endpoint (admin-only)
- Check if param in general settings
"""
if prisma_client is None:
raise HTTPException(
status_code=400,
detail={"error": CommonProxyErrors.db_not_connected_error.value},
)
if user_api_key_dict.user_role != "proxy_admin":
raise HTTPException(
status_code=400,
detail={
"error": "{}, your role={}".format(
CommonProxyErrors.not_allowed_access.value,
user_api_key_dict.user_role,
)
},
)
if data.field_name not in ConfigGeneralSettings.model_fields:
raise HTTPException(
status_code=400,
detail={"error": "Invalid field={} passed in.".format(data.field_name)},
)
## get general settings from db
db_general_settings = await prisma_client.db.litellm_config.find_first(
where={"param_name": "general_settings"}
)
### pop the value
if db_general_settings is None or db_general_settings.param_value is None:
raise HTTPException(
status_code=400,
detail={"error": "Field name={} not in config".format(data.field_name)},
)
else:
general_settings = dict(db_general_settings.param_value)
## update db
general_settings.pop(data.field_name, None)
response = await prisma_client.db.litellm_config.upsert(
where={"param_name": "general_settings"},
data={
"create": {"param_name": "general_settings", "param_value": json.dumps(general_settings)}, # type: ignore
"update": {"param_value": json.dumps(general_settings)}, # type: ignore
},
)
return response
@router.get(
"/get/config/callbacks",
tags=["config.yaml"],
@ -9591,6 +9949,7 @@ async def config_yaml_endpoint(config_info: ConfigYAML):
return {"hello": "world"}
#### BASIC ENDPOINTS ####
@router.get(
"/test",
tags=["health"],

View file

@ -252,8 +252,8 @@ class ProxyLogging:
"""
Runs the CustomLogger's async_moderation_hook()
"""
for callback in litellm.callbacks:
new_data = copy.deepcopy(data)
for callback in litellm.callbacks:
try:
if isinstance(callback, CustomLogger):
await callback.async_moderation_hook(
@ -418,9 +418,14 @@ class ProxyLogging:
Related issue - https://github.com/BerriAI/litellm/issues/3395
"""
litellm_debug_info = getattr(original_exception, "litellm_debug_info", None)
exception_str = str(original_exception)
if litellm_debug_info is not None:
exception_str += litellm_debug_info
asyncio.create_task(
self.alerting_handler(
message=f"LLM API call failed: {str(original_exception)}",
message=f"LLM API call failed: {exception_str}",
level="High",
alert_type="llm_exceptions",
request_data=request_data,

View file

@ -262,13 +262,22 @@ class Router:
self.retry_after = retry_after
self.routing_strategy = routing_strategy
self.fallbacks = fallbacks or litellm.fallbacks
## SETTING FALLBACKS ##
### validate if it's set + in correct format
_fallbacks = fallbacks or litellm.fallbacks
self.validate_fallbacks(fallback_param=_fallbacks)
### set fallbacks
self.fallbacks = _fallbacks
if default_fallbacks is not None or litellm.default_fallbacks is not None:
_fallbacks = default_fallbacks or litellm.default_fallbacks
if self.fallbacks is not None:
self.fallbacks.append({"*": _fallbacks})
else:
self.fallbacks = [{"*": _fallbacks}]
self.context_window_fallbacks = (
context_window_fallbacks or litellm.context_window_fallbacks
)
@ -336,6 +345,21 @@ class Router:
if self.alerting_config is not None:
self._initialize_alerting()
def validate_fallbacks(self, fallback_param: Optional[List]):
if fallback_param is None:
return
if len(fallback_param) > 0: # if set
## for dictionary in list, check if only 1 key in dict
for _dict in fallback_param:
assert isinstance(_dict, dict), "Item={}, not a dictionary".format(
_dict
)
assert (
len(_dict.keys()) == 1
), "Only 1 key allows in dictionary. You set={} for dict={}".format(
len(_dict.keys()), _dict
)
def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict):
if routing_strategy == "least-busy":
self.leastbusy_logger = LeastBusyLoggingHandler(
@ -1962,6 +1986,45 @@ class Router:
key=rpm_key, value=request_count, local_only=True
) # don't change existing ttl
def _is_cooldown_required(self, exception_status: Union[str, int]):
"""
A function to determine if a cooldown is required based on the exception status.
Parameters:
exception_status (Union[str, int]): The status of the exception.
Returns:
bool: True if a cooldown is required, False otherwise.
"""
try:
if isinstance(exception_status, str):
exception_status = int(exception_status)
if exception_status >= 400 and exception_status < 500:
if exception_status == 429:
# Cool down 429 Rate Limit Errors
return True
elif exception_status == 401:
# Cool down 401 Auth Errors
return True
elif exception_status == 408:
return True
else:
# Do NOT cool down all other 4XX Errors
return False
else:
# should cool down for all other errors
return True
except:
# Catch all - if any exceptions default to cooling down
return True
def _set_cooldown_deployments(
self, exception_status: Union[str, int], deployment: Optional[str] = None
):
@ -1975,6 +2038,9 @@ class Router:
if deployment is None:
return
if self._is_cooldown_required(exception_status=exception_status) == False:
return
dt = get_utc_datetime()
current_minute = dt.strftime("%H-%M")
# get current fails for deployment

View file

@ -27,7 +27,7 @@ class LiteLLMBase(BaseModel):
class RoutingArgs(LiteLLMBase):
ttl: int = 1 * 60 * 60 # 1 hour
ttl: float = 1 * 60 * 60 # 1 hour
lowest_latency_buffer: float = 0
max_latency_list_size: int = 10

View file

@ -376,9 +376,8 @@ def test_vertex_ai_stream():
print("making request", model)
response = completion(
model=model,
messages=[
{"role": "user", "content": "write 10 line code code for saying hi"}
],
messages=[{"role": "user", "content": "hello tell me a short story"}],
max_tokens=15,
stream=True,
)
completed_str = ""

View file

@ -38,7 +38,7 @@ def reset_callbacks():
@pytest.mark.skip(reason="Local test")
def test_response_model_none():
"""
Addresses - https://github.com/BerriAI/litellm/issues/2972
Addresses:https://github.com/BerriAI/litellm/issues/2972
"""
x = completion(
model="mymodel",

View file

@ -5,7 +5,6 @@
import sys, os
import traceback
from dotenv import load_dotenv
from pydantic import ConfigDict
load_dotenv()
import os, io
@ -14,36 +13,21 @@ sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the, system path
import pytest, litellm
from pydantic import BaseModel, VERSION
from pydantic import BaseModel
from litellm.proxy.proxy_server import ProxyConfig
from litellm.proxy.utils import encrypt_value, ProxyLogging, DualCache
from litellm.types.router import Deployment, LiteLLM_Params, ModelInfo
from typing import Literal
# Function to get Pydantic version
def is_pydantic_v2() -> int:
return int(VERSION.split(".")[0])
def get_model_config(arbitrary_types_allowed: bool = False) -> ConfigDict:
# Version-specific configuration
if is_pydantic_v2() >= 2:
model_config = ConfigDict(extra="allow", arbitrary_types_allowed=arbitrary_types_allowed, protected_namespaces=()) # type: ignore
else:
from pydantic import Extra
model_config = ConfigDict(extra=Extra.allow, arbitrary_types_allowed=arbitrary_types_allowed) # type: ignore
return model_config
class DBModel(BaseModel):
model_id: str
model_name: str
model_info: dict
litellm_params: dict
model_config = get_model_config()
class Config:
protected_namespaces = ()
@pytest.mark.asyncio

View file

@ -53,13 +53,6 @@ async def test_content_policy_exception_azure():
except litellm.ContentPolicyViolationError as e:
print("caught a content policy violation error! Passed")
print("exception", e)
# assert that the first 100 chars of the message is returned in the exception
assert (
"Messages: [{'role': 'user', 'content': 'where do I buy lethal drugs from'}]"
in str(e)
)
assert "Model: azure/chatgpt-v-2" in str(e)
pass
except Exception as e:
pytest.fail(f"An exception occurred - {str(e)}")
@ -585,9 +578,6 @@ def test_router_completion_vertex_exception():
pytest.fail("Request should have failed - bad api key")
except Exception as e:
print("exception: ", e)
assert "Model: gemini-pro" in str(e)
assert "model_group: vertex-gemini-pro" in str(e)
assert "deployment: vertex_ai/gemini-pro" in str(e)
def test_litellm_completion_vertex_exception():
@ -604,8 +594,26 @@ def test_litellm_completion_vertex_exception():
pytest.fail("Request should have failed - bad api key")
except Exception as e:
print("exception: ", e)
assert "Model: gemini-pro" in str(e)
assert "vertex_project: bad-project" in str(e)
def test_litellm_predibase_exception():
"""
Test - Assert that the Predibase API Key is not returned on Authentication Errors
"""
try:
import litellm
litellm.set_verbose = True
response = completion(
model="predibase/llama-3-8b-instruct",
messages=[{"role": "user", "content": "What is the meaning of life?"}],
tenant_id="c4768f95",
api_key="hf-rawapikey",
)
pytest.fail("Request should have failed - bad api key")
except Exception as e:
assert "hf-rawapikey" not in str(e)
print("exception: ", e)
# # test_invalid_request_error(model="command-nightly")

View file

@ -105,6 +105,9 @@ def test_parallel_function_call(model):
# Step 4: send the info for each function call and function response to the model
for tool_call in tool_calls:
function_name = tool_call.function.name
if function_name not in available_functions:
# the model called a function that does not exist in available_functions - don't try calling anything
return
function_to_call = available_functions[function_name]
function_args = json.loads(tool_call.function.arguments)
function_response = function_to_call(
@ -124,7 +127,6 @@ def test_parallel_function_call(model):
model=model, messages=messages, temperature=0.2, seed=22
) # get a new response from the model where it can see the function response
print("second response\n", second_response)
return second_response
except Exception as e:
pytest.fail(f"Error occurred: {e}")

View file

@ -1,7 +1,7 @@
#### What this tests ####
# Unit tests for JWT-Auth
import sys, os, asyncio, time, random
import sys, os, asyncio, time, random, uuid
import traceback
from dotenv import load_dotenv
@ -24,6 +24,7 @@ public_key = {
"alg": "RS256",
}
def test_load_config_with_custom_role_names():
config = {
"general_settings": {
@ -77,7 +78,8 @@ async def test_token_single_public_key():
== "qIgOQfEVrrErJC0E7gsHXi6rs_V0nyFY5qPFui2-tv0o4CwpwDzgfBtLO7o_wLiguq0lnu54sMT2eLNoRiiPuLvv6bg7Iy1H9yc5_4Jf5oYEOrqN5o9ZBOoYp1q68Pv0oNJYyZdGu5ZJfd7V4y953vB2XfEKgXCsAkhVhlvIUMiDNKWoMDWsyb2xela5tRURZ2mJAXcHfSC_sYdZxIA2YYrIHfoevq_vTlaz0qVSe_uOKjEpgOAS08UUrgda4CQL11nzICiIQzc6qmjIQt2cjzB2D_9zb4BYndzEtfl0kwAT0z_I85S3mkwTqHU-1BvKe_4MG4VG3dAAeffLPXJyXQ"
)
@pytest.mark.parametrize('audience', [None, "litellm-proxy"])
@pytest.mark.parametrize("audience", [None, "litellm-proxy"])
@pytest.mark.asyncio
async def test_valid_invalid_token(audience):
"""
@ -90,7 +92,7 @@ async def test_valid_invalid_token(audience):
from cryptography.hazmat.primitives.asymmetric import rsa
from cryptography.hazmat.backends import default_backend
os.environ.pop('JWT_AUDIENCE', None)
os.environ.pop("JWT_AUDIENCE", None)
if audience:
os.environ["JWT_AUDIENCE"] = audience
@ -138,7 +140,7 @@ async def test_valid_invalid_token(audience):
"sub": "user123",
"exp": expiration_time, # set the token to expire in 10 minutes
"scope": "litellm-proxy-admin",
"aud": audience
"aud": audience,
}
# Generate the JWT token
@ -166,7 +168,7 @@ async def test_valid_invalid_token(audience):
"sub": "user123",
"exp": expiration_time, # set the token to expire in 10 minutes
"scope": "litellm-NO-SCOPE",
"aud": audience
"aud": audience,
}
# Generate the JWT token
@ -183,6 +185,7 @@ async def test_valid_invalid_token(audience):
except Exception as e:
pytest.fail(f"An exception occurred - {str(e)}")
@pytest.fixture
def prisma_client():
import litellm
@ -205,7 +208,7 @@ def prisma_client():
return prisma_client
@pytest.mark.parametrize('audience', [None, "litellm-proxy"])
@pytest.mark.parametrize("audience", [None, "litellm-proxy"])
@pytest.mark.asyncio
async def test_team_token_output(prisma_client, audience):
import jwt, json
@ -222,7 +225,7 @@ async def test_team_token_output(prisma_client, audience):
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
await litellm.proxy.proxy_server.prisma_client.connect()
os.environ.pop('JWT_AUDIENCE', None)
os.environ.pop("JWT_AUDIENCE", None)
if audience:
os.environ["JWT_AUDIENCE"] = audience
@ -261,7 +264,7 @@ async def test_team_token_output(prisma_client, audience):
jwt_handler.user_api_key_cache = cache
jwt_handler.litellm_jwtauth = LiteLLM_JWTAuth()
jwt_handler.litellm_jwtauth = LiteLLM_JWTAuth(team_id_jwt_field="client_id")
# VALID TOKEN
## GENERATE A TOKEN
@ -274,7 +277,7 @@ async def test_team_token_output(prisma_client, audience):
"exp": expiration_time, # set the token to expire in 10 minutes
"scope": "litellm_team",
"client_id": team_id,
"aud": audience
"aud": audience,
}
# Generate the JWT token
@ -289,7 +292,7 @@ async def test_team_token_output(prisma_client, audience):
"sub": "user123",
"exp": expiration_time, # set the token to expire in 10 minutes
"scope": "litellm_proxy_admin",
"aud": audience
"aud": audience,
}
admin_token = jwt.encode(payload, private_key_str, algorithm="RS256")
@ -315,7 +318,13 @@ async def test_team_token_output(prisma_client, audience):
## 1. INITIAL TEAM CALL - should fail
# use generated key to auth in
setattr(litellm.proxy.proxy_server, "general_settings", {"enable_jwt_auth": True})
setattr(
litellm.proxy.proxy_server,
"general_settings",
{
"enable_jwt_auth": True,
},
)
setattr(litellm.proxy.proxy_server, "jwt_handler", jwt_handler)
try:
result = await user_api_key_auth(request=request, api_key=bearer_token)
@ -358,9 +367,22 @@ async def test_team_token_output(prisma_client, audience):
assert team_result.team_models == ["gpt-3.5-turbo", "gpt-4"]
@pytest.mark.parametrize('audience', [None, "litellm-proxy"])
@pytest.mark.parametrize("audience", [None, "litellm-proxy"])
@pytest.mark.parametrize(
"team_id_set, default_team_id",
[(True, False), (False, True)],
)
@pytest.mark.parametrize("user_id_upsert", [True, False])
@pytest.mark.asyncio
async def test_user_token_output(prisma_client, audience):
async def test_user_token_output(
prisma_client, audience, team_id_set, default_team_id, user_id_upsert
):
import uuid
args = locals()
print(f"received args - {args}")
if default_team_id:
default_team_id = "team_id_12344_{}".format(uuid.uuid4())
"""
- If user required, check if it exists
- fail initial request (when user doesn't exist)
@ -373,7 +395,12 @@ async def test_user_token_output(prisma_client, audience):
from cryptography.hazmat.backends import default_backend
from fastapi import Request
from starlette.datastructures import URL
from litellm.proxy.proxy_server import user_api_key_auth, new_team, new_user
from litellm.proxy.proxy_server import (
user_api_key_auth,
new_team,
new_user,
user_info,
)
from litellm.proxy._types import NewTeamRequest, UserAPIKeyAuth, NewUserRequest
import litellm
import uuid
@ -381,7 +408,7 @@ async def test_user_token_output(prisma_client, audience):
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
await litellm.proxy.proxy_server.prisma_client.connect()
os.environ.pop('JWT_AUDIENCE', None)
os.environ.pop("JWT_AUDIENCE", None)
if audience:
os.environ["JWT_AUDIENCE"] = audience
@ -423,6 +450,11 @@ async def test_user_token_output(prisma_client, audience):
jwt_handler.litellm_jwtauth = LiteLLM_JWTAuth()
jwt_handler.litellm_jwtauth.user_id_jwt_field = "sub"
jwt_handler.litellm_jwtauth.team_id_default = default_team_id
jwt_handler.litellm_jwtauth.user_id_upsert = user_id_upsert
if team_id_set:
jwt_handler.litellm_jwtauth.team_id_jwt_field = "client_id"
# VALID TOKEN
## GENERATE A TOKEN
@ -436,7 +468,7 @@ async def test_user_token_output(prisma_client, audience):
"exp": expiration_time, # set the token to expire in 10 minutes
"scope": "litellm_team",
"client_id": team_id,
"aud": audience
"aud": audience,
}
# Generate the JWT token
@ -451,7 +483,7 @@ async def test_user_token_output(prisma_client, audience):
"sub": user_id,
"exp": expiration_time, # set the token to expire in 10 minutes
"scope": "litellm_proxy_admin",
"aud": audience
"aud": audience,
}
admin_token = jwt.encode(payload, private_key_str, algorithm="RS256")
@ -503,6 +535,16 @@ async def test_user_token_output(prisma_client, audience):
),
user_api_key_dict=result,
)
if default_team_id:
await new_team(
data=NewTeamRequest(
team_id=default_team_id,
tpm_limit=100,
rpm_limit=99,
models=["gpt-3.5-turbo", "gpt-4"],
),
user_api_key_dict=result,
)
except Exception as e:
pytest.fail(f"This should not fail - {str(e)}")
@ -513,11 +555,23 @@ async def test_user_token_output(prisma_client, audience):
team_result: UserAPIKeyAuth = await user_api_key_auth(
request=request, api_key=bearer_token
)
if user_id_upsert == False:
pytest.fail(f"User doesn't exist. this should fail")
except Exception as e:
pass
## 4. Create user
if user_id_upsert:
## check if user already exists
try:
bearer_token = "Bearer " + admin_token
request._url = URL(url="/team/new")
result = await user_api_key_auth(request=request, api_key=bearer_token)
await user_info(user_id=user_id)
except Exception as e:
pytest.fail(f"This should not fail - {str(e)}")
else:
try:
bearer_token = "Bearer " + admin_token
@ -543,6 +597,7 @@ async def test_user_token_output(prisma_client, audience):
## 6. ASSERT USER_API_KEY_AUTH format (used for tpm/rpm limiting in parallel_request_limiter.py AND cost tracking)
if team_id_set or default_team_id is not None:
assert team_result.team_tpm_limit == 100
assert team_result.team_rpm_limit == 99
assert team_result.team_models == ["gpt-3.5-turbo", "gpt-4"]

View file

@ -705,7 +705,7 @@ async def test_lowest_latency_routing_first_pick():
) # type: ignore
deployments = {}
for _ in range(5):
for _ in range(10):
response = await router.acompletion(
model="azure-model", messages=[{"role": "user", "content": "hello"}]
)

View file

@ -28,6 +28,37 @@ from datetime import datetime
## On Request failure
@pytest.mark.asyncio
async def test_global_max_parallel_requests():
"""
Test if ParallelRequestHandler respects 'global_max_parallel_requests'
data["metadata"]["global_max_parallel_requests"]
"""
global_max_parallel_requests = 0
_api_key = "sk-12345"
_api_key = hash_token("sk-12345")
user_api_key_dict = UserAPIKeyAuth(api_key=_api_key, max_parallel_requests=100)
local_cache = DualCache()
parallel_request_handler = MaxParallelRequestsHandler()
for _ in range(3):
try:
await parallel_request_handler.async_pre_call_hook(
user_api_key_dict=user_api_key_dict,
cache=local_cache,
data={
"metadata": {
"global_max_parallel_requests": global_max_parallel_requests
}
},
call_type="",
)
pytest.fail("Expected call to fail")
except Exception as e:
pass
@pytest.mark.asyncio
async def test_pre_call_hook():
"""

View file

@ -0,0 +1,64 @@
#### What this tests ####
# This tests calling router with fallback models
import sys, os, time
import traceback, asyncio
import pytest
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import Router
from litellm.integrations.custom_logger import CustomLogger
import openai, httpx
@pytest.mark.asyncio
async def test_cooldown_badrequest_error():
"""
Test 1. It SHOULD NOT cooldown a deployment on a BadRequestError
"""
router = litellm.Router(
model_list=[
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
},
}
],
debug_level="DEBUG",
set_verbose=True,
cooldown_time=300,
num_retries=0,
allowed_fails=0,
)
# Act & Assert
try:
response = await router.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "gm"}],
bad_param=200,
)
except:
pass
await asyncio.sleep(3) # wait for deployment to get cooled-down
response = await router.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "gm"}],
mock_response="hello",
)
assert response is not None
print(response)

View file

@ -82,7 +82,7 @@ def test_async_fallbacks(caplog):
# Define the expected log messages
# - error request, falling back notice, success notice
expected_logs = [
"litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}} \nModel: gpt-3.5-turbo\nAPI Base: https://api.openai.com\nMessages: [{'content': 'Hello, how are you?', 'role': 'user'}]\nmodel_group: gpt-3.5-turbo\n\ndeployment: gpt-3.5-turbo\n\x1b[0m",
"litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}\x1b[0m",
"Falling back to model_group = azure/gpt-3.5-turbo",
"litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m",
"Successful fallback b/w models.",

View file

@ -1,27 +1,10 @@
from typing import List, Optional, Union, Iterable, cast
from typing import List, Optional, Union, Iterable
from pydantic import ConfigDict, BaseModel, validator, VERSION
from pydantic import BaseModel, validator
from typing_extensions import Literal, Required, TypedDict
# Function to get Pydantic version
def is_pydantic_v2() -> int:
return int(VERSION.split(".")[0])
def get_model_config() -> ConfigDict:
# Version-specific configuration
if is_pydantic_v2() >= 2:
model_config = ConfigDict(extra="allow", protected_namespaces=()) # type: ignore
else:
from pydantic import Extra
model_config = ConfigDict(extra=Extra.allow) # type: ignore
return model_config
class ChatCompletionSystemMessageParam(TypedDict, total=False):
content: Required[str]
"""The contents of the system message."""
@ -208,4 +191,6 @@ class CompletionRequest(BaseModel):
api_key: Optional[str] = None
model_list: Optional[List[str]] = None
model_config = get_model_config()
class Config:
extra = "allow"
protected_namespaces = ()

View file

@ -1,23 +1,6 @@
from typing import List, Optional, Union
from pydantic import ConfigDict, BaseModel, validator, VERSION
# Function to get Pydantic version
def is_pydantic_v2() -> int:
return int(VERSION.split(".")[0])
def get_model_config(arbitrary_types_allowed: bool = False) -> ConfigDict:
# Version-specific configuration
if is_pydantic_v2() >= 2:
model_config = ConfigDict(extra="allow", arbitrary_types_allowed=arbitrary_types_allowed, protected_namespaces=()) # type: ignore
else:
from pydantic import Extra
model_config = ConfigDict(extra=Extra.allow, arbitrary_types_allowed=arbitrary_types_allowed) # type: ignore
return model_config
from pydantic import BaseModel, validator
class EmbeddingRequest(BaseModel):
@ -34,4 +17,7 @@ class EmbeddingRequest(BaseModel):
litellm_call_id: Optional[str] = None
litellm_logging_obj: Optional[dict] = None
logger_fn: Optional[str] = None
model_config = get_model_config()
class Config:
# allow kwargs
extra = "allow"

View file

@ -1,42 +1,19 @@
from typing import List, Optional, Union, Dict, Tuple, Literal, TypedDict
import httpx
from pydantic import (
ConfigDict,
BaseModel,
validator,
Field,
__version__ as pydantic_version,
VERSION,
)
from pydantic import BaseModel, validator, Field
from .completion import CompletionRequest
from .embedding import EmbeddingRequest
import uuid, enum
# Function to get Pydantic version
def is_pydantic_v2() -> int:
return int(VERSION.split(".")[0])
def get_model_config(arbitrary_types_allowed: bool = False) -> ConfigDict:
# Version-specific configuration
if is_pydantic_v2() >= 2:
model_config = ConfigDict(extra="allow", arbitrary_types_allowed=arbitrary_types_allowed, protected_namespaces=()) # type: ignore
else:
from pydantic import Extra
model_config = ConfigDict(extra=Extra.allow, arbitrary_types_allowed=arbitrary_types_allowed) # type: ignore
return model_config
class ModelConfig(BaseModel):
model_name: str
litellm_params: Union[CompletionRequest, EmbeddingRequest]
tpm: int
rpm: int
model_config = get_model_config()
class Config:
protected_namespaces = ()
class RouterConfig(BaseModel):
@ -67,7 +44,8 @@ class RouterConfig(BaseModel):
"latency-based-routing",
] = "simple-shuffle"
model_config = get_model_config()
class Config:
protected_namespaces = ()
class UpdateRouterConfig(BaseModel):
@ -87,7 +65,8 @@ class UpdateRouterConfig(BaseModel):
fallbacks: Optional[List[dict]] = None
context_window_fallbacks: Optional[List[dict]] = None
model_config = get_model_config()
class Config:
protected_namespaces = ()
class ModelInfo(BaseModel):
@ -105,7 +84,8 @@ class ModelInfo(BaseModel):
id = str(id)
super().__init__(id=id, **params)
model_config = get_model_config()
class Config:
extra = "allow"
def __contains__(self, key):
# Define custom behavior for the 'in' operator
@ -200,14 +180,8 @@ class GenericLiteLLMParams(BaseModel):
max_retries = int(max_retries) # cast to int
super().__init__(max_retries=max_retries, **args, **params)
model_config = get_model_config(arbitrary_types_allowed=True)
if pydantic_version.startswith("1"):
# pydantic v2 warns about using a Config class.
# But without this, pydantic v1 will raise an error:
# RuntimeError: no validator found for <class 'openai.Timeout'>,
# see `arbitrary_types_allowed` in Config
# Putting arbitrary_types_allowed = True in the ConfigDict doesn't work in pydantic v1.
class Config:
extra = "allow"
arbitrary_types_allowed = True
def __contains__(self, key):
@ -267,15 +241,8 @@ class LiteLLM_Params(GenericLiteLLMParams):
max_retries = int(max_retries) # cast to int
super().__init__(max_retries=max_retries, **args, **params)
model_config = get_model_config(arbitrary_types_allowed=True)
if pydantic_version.startswith("1"):
# pydantic v2 warns about using a Config class.
# But without this, pydantic v1 will raise an error:
# RuntimeError: no validator found for <class 'openai.Timeout'>,
# see `arbitrary_types_allowed` in Config
# Putting arbitrary_types_allowed = True in the ConfigDict doesn't work in pydantic v1.
class Config:
extra = "allow"
arbitrary_types_allowed = True
def __contains__(self, key):
@ -306,7 +273,8 @@ class updateDeployment(BaseModel):
litellm_params: Optional[updateLiteLLMParams] = None
model_info: Optional[ModelInfo] = None
model_config = get_model_config()
class Config:
protected_namespaces = ()
class LiteLLMParamsTypedDict(TypedDict, total=False):
@ -380,7 +348,9 @@ class Deployment(BaseModel):
# if using pydantic v1
return self.dict(**kwargs)
model_config = get_model_config()
class Config:
extra = "allow"
protected_namespaces = ()
def __contains__(self, key):
# Define custom behavior for the 'in' operator

View file

@ -19,7 +19,7 @@ from functools import wraps, lru_cache
import datetime, time
import tiktoken
import uuid
from pydantic import ConfigDict, BaseModel, VERSION
from pydantic import BaseModel
import aiohttp
import textwrap
import logging
@ -185,23 +185,6 @@ last_fetched_at_keys = None
# }
# Function to get Pydantic version
def is_pydantic_v2() -> int:
return int(VERSION.split(".")[0])
def get_model_config(arbitrary_types_allowed: bool = False) -> ConfigDict:
# Version-specific configuration
if is_pydantic_v2() >= 2:
model_config = ConfigDict(extra="allow", arbitrary_types_allowed=arbitrary_types_allowed, protected_namespaces=()) # type: ignore
else:
from pydantic import Extra
model_config = ConfigDict(extra=Extra.allow, arbitrary_types_allowed=arbitrary_types_allowed) # type: ignore
return model_config
class UnsupportedParamsError(Exception):
def __init__(self, status_code, message):
self.status_code = status_code
@ -348,7 +331,10 @@ class HiddenParams(OpenAIObject):
original_response: Optional[str] = None
model_id: Optional[str] = None # used in Router for individual deployments
api_base: Optional[str] = None # returns api base used for making completion call
model_config = get_model_config()
class Config:
extra = "allow"
protected_namespaces = ()
def get(self, key, default=None):
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
@ -2579,7 +2565,7 @@ class Logging:
response_obj=result,
start_time=start_time,
end_time=end_time,
)
) # type: ignore
if callable(callback): # custom logger functions
await customLogger.async_log_event(
kwargs=self.model_call_details,
@ -6778,7 +6764,7 @@ def get_max_tokens(model: str):
raise Exception()
except:
raise Exception(
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
f"Model {model} isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
)
@ -8155,8 +8141,11 @@ def exception_type(
# Common Extra information needed for all providers
# We pass num retries, api_base, vertex_deployment etc to the exception here
################################################################################
_api_base = litellm.get_api_base(model=model, optional_params=extra_kwargs)
extra_information = ""
try:
_api_base = litellm.get_api_base(
model=model, optional_params=extra_kwargs
)
messages = litellm.get_first_chars_messages(kwargs=completion_kwargs)
_vertex_project = extra_kwargs.get("vertex_project")
_vertex_location = extra_kwargs.get("vertex_location")
@ -8182,6 +8171,9 @@ def exception_type(
extra_information = _add_key_name_and_team_to_alert(
request_info=extra_information, metadata=_metadata
)
except:
# DO NOT LET this Block raising the original exception
pass
################################################################################
# End of Common Extra information Needed for all providers
@ -8194,9 +8186,10 @@ def exception_type(
if "Request Timeout Error" in error_str or "Request timed out" in error_str:
exception_mapping_worked = True
raise Timeout(
message=f"APITimeoutError - Request timed out. {extra_information} \n error_str: {error_str}",
message=f"APITimeoutError - Request timed out. \nerror_str: {error_str}",
model=model,
llm_provider=custom_llm_provider,
litellm_debug_info=extra_information,
)
if (
@ -8226,10 +8219,11 @@ def exception_type(
if "This model's maximum context length is" in error_str:
exception_mapping_worked = True
raise ContextWindowExceededError(
message=f"{exception_provider} - {message} {extra_information}",
message=f"{exception_provider} - {message}",
llm_provider=custom_llm_provider,
model=model,
response=original_exception.response,
litellm_debug_info=extra_information,
)
elif (
"invalid_request_error" in error_str
@ -8237,10 +8231,11 @@ def exception_type(
):
exception_mapping_worked = True
raise NotFoundError(
message=f"{exception_provider} - {message} {extra_information}",
message=f"{exception_provider} - {message}",
llm_provider=custom_llm_provider,
model=model,
response=original_exception.response,
litellm_debug_info=extra_information,
)
elif (
"invalid_request_error" in error_str
@ -8248,10 +8243,11 @@ def exception_type(
):
exception_mapping_worked = True
raise ContentPolicyViolationError(
message=f"{exception_provider} - {message} {extra_information}",
message=f"{exception_provider} - {message}",
llm_provider=custom_llm_provider,
model=model,
response=original_exception.response,
litellm_debug_info=extra_information,
)
elif (
"invalid_request_error" in error_str
@ -8259,17 +8255,19 @@ def exception_type(
):
exception_mapping_worked = True
raise BadRequestError(
message=f"{exception_provider} - {message} {extra_information}",
message=f"{exception_provider} - {message}",
llm_provider=custom_llm_provider,
model=model,
response=original_exception.response,
litellm_debug_info=extra_information,
)
elif "Request too large" in error_str:
raise RateLimitError(
message=f"{exception_provider} - {message} {extra_information}",
message=f"{exception_provider} - {message}",
model=model,
llm_provider=custom_llm_provider,
response=original_exception.response,
litellm_debug_info=extra_information,
)
elif (
"The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable"
@ -8277,10 +8275,11 @@ def exception_type(
):
exception_mapping_worked = True
raise AuthenticationError(
message=f"{exception_provider} - {message} {extra_information}",
message=f"{exception_provider} - {message}",
llm_provider=custom_llm_provider,
model=model,
response=original_exception.response,
litellm_debug_info=extra_information,
)
elif "Mistral API raised a streaming error" in error_str:
exception_mapping_worked = True
@ -8289,82 +8288,92 @@ def exception_type(
)
raise APIError(
status_code=500,
message=f"{exception_provider} - {message} {extra_information}",
message=f"{exception_provider} - {message}",
llm_provider=custom_llm_provider,
model=model,
request=_request,
litellm_debug_info=extra_information,
)
elif hasattr(original_exception, "status_code"):
exception_mapping_worked = True
if original_exception.status_code == 401:
exception_mapping_worked = True
raise AuthenticationError(
message=f"{exception_provider} - {message} {extra_information}",
message=f"{exception_provider} - {message}",
llm_provider=custom_llm_provider,
model=model,
response=original_exception.response,
litellm_debug_info=extra_information,
)
elif original_exception.status_code == 404:
exception_mapping_worked = True
raise NotFoundError(
message=f"{exception_provider} - {message} {extra_information}",
message=f"{exception_provider} - {message}",
model=model,
llm_provider=custom_llm_provider,
response=original_exception.response,
litellm_debug_info=extra_information,
)
elif original_exception.status_code == 408:
exception_mapping_worked = True
raise Timeout(
message=f"{exception_provider} - {message} {extra_information}",
message=f"{exception_provider} - {message}",
model=model,
llm_provider=custom_llm_provider,
litellm_debug_info=extra_information,
)
elif original_exception.status_code == 422:
exception_mapping_worked = True
raise BadRequestError(
message=f"{exception_provider} - {message} {extra_information}",
message=f"{exception_provider} - {message}",
model=model,
llm_provider=custom_llm_provider,
response=original_exception.response,
litellm_debug_info=extra_information,
)
elif original_exception.status_code == 429:
exception_mapping_worked = True
raise RateLimitError(
message=f"{exception_provider} - {message} {extra_information}",
message=f"{exception_provider} - {message}",
model=model,
llm_provider=custom_llm_provider,
response=original_exception.response,
litellm_debug_info=extra_information,
)
elif original_exception.status_code == 503:
exception_mapping_worked = True
raise ServiceUnavailableError(
message=f"{exception_provider} - {message} {extra_information}",
message=f"{exception_provider} - {message}",
model=model,
llm_provider=custom_llm_provider,
response=original_exception.response,
litellm_debug_info=extra_information,
)
elif original_exception.status_code == 504: # gateway timeout error
exception_mapping_worked = True
raise Timeout(
message=f"{exception_provider} - {message} {extra_information}",
message=f"{exception_provider} - {message}",
model=model,
llm_provider=custom_llm_provider,
litellm_debug_info=extra_information,
)
else:
exception_mapping_worked = True
raise APIError(
status_code=original_exception.status_code,
message=f"{exception_provider} - {message} {extra_information}",
message=f"{exception_provider} - {message}",
llm_provider=custom_llm_provider,
model=model,
request=original_exception.request,
litellm_debug_info=extra_information,
)
else:
# if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
raise APIConnectionError(
message=f"{exception_provider} - {message} {extra_information}",
message=f"{exception_provider} - {message}",
llm_provider=custom_llm_provider,
model=model,
litellm_debug_info=extra_information,
request=httpx.Request(
method="POST", url="https://api.openai.com/v1/"
),
@ -8529,6 +8538,28 @@ def exception_type(
model=model,
response=original_exception.response,
)
elif custom_llm_provider == "predibase":
if "authorization denied for" in error_str:
exception_mapping_worked = True
# Predibase returns the raw API Key in the response - this block ensures it's not returned in the exception
if (
error_str is not None
and isinstance(error_str, str)
and "bearer" in error_str.lower()
):
# only keep the first 10 chars after the occurnence of "bearer"
_bearer_token_start_index = error_str.lower().find("bearer")
error_str = error_str[: _bearer_token_start_index + 14]
error_str += "XXXXXXX" + '"'
raise AuthenticationError(
message=f"PredibaseException: Authentication Error - {error_str}",
llm_provider="predibase",
model=model,
response=original_exception.response,
litellm_debug_info=extra_information,
)
elif custom_llm_provider == "bedrock":
if (
"too many tokens" in error_str
@ -8666,10 +8697,11 @@ def exception_type(
):
exception_mapping_worked = True
raise BadRequestError(
message=f"VertexAIException - {error_str} {extra_information}",
message=f"VertexAIException - {error_str}",
model=model,
llm_provider="vertex_ai",
response=original_exception.response,
litellm_debug_info=extra_information,
)
elif (
"None Unknown Error." in error_str
@ -8677,26 +8709,29 @@ def exception_type(
):
exception_mapping_worked = True
raise APIError(
message=f"VertexAIException - {error_str} {extra_information}",
message=f"VertexAIException - {error_str}",
status_code=500,
model=model,
llm_provider="vertex_ai",
request=original_exception.request,
litellm_debug_info=extra_information,
)
elif "403" in error_str:
exception_mapping_worked = True
raise BadRequestError(
message=f"VertexAIException - {error_str} {extra_information}",
message=f"VertexAIException - {error_str}",
model=model,
llm_provider="vertex_ai",
response=original_exception.response,
litellm_debug_info=extra_information,
)
elif "The response was blocked." in error_str:
exception_mapping_worked = True
raise UnprocessableEntityError(
message=f"VertexAIException - {error_str} {extra_information}",
message=f"VertexAIException - {error_str}",
model=model,
llm_provider="vertex_ai",
litellm_debug_info=extra_information,
response=httpx.Response(
status_code=429,
request=httpx.Request(
@ -8713,9 +8748,10 @@ def exception_type(
):
exception_mapping_worked = True
raise RateLimitError(
message=f"VertexAIException - {error_str} {extra_information}",
message=f"VertexAIException - {error_str}",
model=model,
llm_provider="vertex_ai",
litellm_debug_info=extra_information,
response=httpx.Response(
status_code=429,
request=httpx.Request(
@ -8728,18 +8764,20 @@ def exception_type(
if original_exception.status_code == 400:
exception_mapping_worked = True
raise BadRequestError(
message=f"VertexAIException - {error_str} {extra_information}",
message=f"VertexAIException - {error_str}",
model=model,
llm_provider="vertex_ai",
litellm_debug_info=extra_information,
response=original_exception.response,
)
if original_exception.status_code == 500:
exception_mapping_worked = True
raise APIError(
message=f"VertexAIException - {error_str} {extra_information}",
message=f"VertexAIException - {error_str}",
status_code=500,
model=model,
llm_provider="vertex_ai",
litellm_debug_info=extra_information,
request=original_exception.request,
)
elif custom_llm_provider == "palm" or custom_llm_provider == "gemini":
@ -9340,25 +9378,28 @@ def exception_type(
exception_mapping_worked = True
raise APIError(
status_code=500,
message=f"AzureException - {original_exception.message} {extra_information}",
message=f"AzureException - {original_exception.message}",
llm_provider="azure",
model=model,
litellm_debug_info=extra_information,
request=httpx.Request(method="POST", url="https://openai.com/"),
)
elif "This model's maximum context length is" in error_str:
exception_mapping_worked = True
raise ContextWindowExceededError(
message=f"AzureException - {original_exception.message} {extra_information}",
message=f"AzureException - {original_exception.message}",
llm_provider="azure",
model=model,
litellm_debug_info=extra_information,
response=original_exception.response,
)
elif "DeploymentNotFound" in error_str:
exception_mapping_worked = True
raise NotFoundError(
message=f"AzureException - {original_exception.message} {extra_information}",
message=f"AzureException - {original_exception.message}",
llm_provider="azure",
model=model,
litellm_debug_info=extra_information,
response=original_exception.response,
)
elif (
@ -9370,17 +9411,19 @@ def exception_type(
):
exception_mapping_worked = True
raise ContentPolicyViolationError(
message=f"AzureException - {original_exception.message} {extra_information}",
message=f"AzureException - {original_exception.message}",
llm_provider="azure",
model=model,
litellm_debug_info=extra_information,
response=original_exception.response,
)
elif "invalid_request_error" in error_str:
exception_mapping_worked = True
raise BadRequestError(
message=f"AzureException - {original_exception.message} {extra_information}",
message=f"AzureException - {original_exception.message}",
llm_provider="azure",
model=model,
litellm_debug_info=extra_information,
response=original_exception.response,
)
elif (
@ -9389,9 +9432,10 @@ def exception_type(
):
exception_mapping_worked = True
raise AuthenticationError(
message=f"{exception_provider} - {original_exception.message} {extra_information}",
message=f"{exception_provider} - {original_exception.message}",
llm_provider=custom_llm_provider,
model=model,
litellm_debug_info=extra_information,
response=original_exception.response,
)
elif hasattr(original_exception, "status_code"):
@ -9399,55 +9443,62 @@ def exception_type(
if original_exception.status_code == 401:
exception_mapping_worked = True
raise AuthenticationError(
message=f"AzureException - {original_exception.message} {extra_information}",
message=f"AzureException - {original_exception.message}",
llm_provider="azure",
model=model,
litellm_debug_info=extra_information,
response=original_exception.response,
)
elif original_exception.status_code == 408:
exception_mapping_worked = True
raise Timeout(
message=f"AzureException - {original_exception.message} {extra_information}",
message=f"AzureException - {original_exception.message}",
model=model,
litellm_debug_info=extra_information,
llm_provider="azure",
)
if original_exception.status_code == 422:
exception_mapping_worked = True
raise BadRequestError(
message=f"AzureException - {original_exception.message} {extra_information}",
message=f"AzureException - {original_exception.message}",
model=model,
llm_provider="azure",
litellm_debug_info=extra_information,
response=original_exception.response,
)
elif original_exception.status_code == 429:
exception_mapping_worked = True
raise RateLimitError(
message=f"AzureException - {original_exception.message} {extra_information}",
message=f"AzureException - {original_exception.message}",
model=model,
llm_provider="azure",
litellm_debug_info=extra_information,
response=original_exception.response,
)
elif original_exception.status_code == 503:
exception_mapping_worked = True
raise ServiceUnavailableError(
message=f"AzureException - {original_exception.message} {extra_information}",
message=f"AzureException - {original_exception.message}",
model=model,
llm_provider="azure",
litellm_debug_info=extra_information,
response=original_exception.response,
)
elif original_exception.status_code == 504: # gateway timeout error
exception_mapping_worked = True
raise Timeout(
message=f"AzureException - {original_exception.message} {extra_information}",
message=f"AzureException - {original_exception.message}",
model=model,
litellm_debug_info=extra_information,
llm_provider="azure",
)
else:
exception_mapping_worked = True
raise APIError(
status_code=original_exception.status_code,
message=f"AzureException - {original_exception.message} {extra_information}",
message=f"AzureException - {original_exception.message}",
llm_provider="azure",
litellm_debug_info=extra_information,
model=model,
request=httpx.Request(
method="POST", url="https://openai.com/"
@ -9456,9 +9507,10 @@ def exception_type(
else:
# if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
raise APIConnectionError(
message=f"{exception_provider} - {message} {extra_information}",
message=f"{exception_provider} - {message}",
llm_provider="azure",
model=model,
litellm_debug_info=extra_information,
request=httpx.Request(method="POST", url="https://openai.com/"),
)
if (

View file

@ -1744,6 +1744,30 @@
"litellm_provider": "openrouter",
"mode": "chat"
},
"openrouter/openai/gpt-4o": {
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000015,
"litellm_provider": "openrouter",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
},
"openrouter/openai/gpt-4o-2024-05-13": {
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000015,
"litellm_provider": "openrouter",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_vision": true
},
"openrouter/openai/gpt-4-vision-preview": {
"max_tokens": 130000,
"input_cost_per_token": 0.00001,
@ -2943,6 +2967,24 @@
"litellm_provider": "ollama",
"mode": "completion"
},
"ollama/llama3": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 8192,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "chat"
},
"ollama/llama3:70b": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 8192,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "chat"
},
"ollama/mistral": {
"max_tokens": 8192,
"max_input_tokens": 8192,
@ -2952,6 +2994,42 @@
"litellm_provider": "ollama",
"mode": "completion"
},
"ollama/mistral-7B-Instruct-v0.1": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 8192,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "chat"
},
"ollama/mistral-7B-Instruct-v0.2": {
"max_tokens": 32768,
"max_input_tokens": 32768,
"max_output_tokens": 32768,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "chat"
},
"ollama/mixtral-8x7B-Instruct-v0.1": {
"max_tokens": 32768,
"max_input_tokens": 32768,
"max_output_tokens": 32768,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "chat"
},
"ollama/mixtral-8x22B-Instruct-v0.1": {
"max_tokens": 65536,
"max_input_tokens": 65536,
"max_output_tokens": 65536,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "chat"
},
"ollama/codellama": {
"max_tokens": 4096,
"max_input_tokens": 4096,

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "1.37.10"
version = "1.37.12"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT"
@ -79,7 +79,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
version = "1.37.10"
version = "1.37.12"
version_files = [
"pyproject.toml:^version"
]

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f04e46b02318b660.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[7926,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"884\",\"static/chunks/884-7576ee407a2ecbe6.js\",\"931\",\"static/chunks/app/page-6a39771cacf75ea6.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f04e46b02318b660.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"obp5wqVSVDMiDTC414cR8\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f04e46b02318b660.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[4858,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"884\",\"static/chunks/884-7576ee407a2ecbe6.js\",\"931\",\"static/chunks/app/page-c35c14c9afd091ec.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f04e46b02318b660.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"2ASoJGxS-D4w-vat00xMy\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[7926,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","884","static/chunks/884-7576ee407a2ecbe6.js","931","static/chunks/app/page-6a39771cacf75ea6.js"],""]
3:I[4858,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","884","static/chunks/884-7576ee407a2ecbe6.js","931","static/chunks/app/page-c35c14c9afd091ec.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["obp5wqVSVDMiDTC414cR8",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f04e46b02318b660.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["2ASoJGxS-D4w-vat00xMy",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f04e46b02318b660.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -23,12 +23,44 @@ import {
AccordionHeader,
AccordionList,
} from "@tremor/react";
import { TabPanel, TabPanels, TabGroup, TabList, Tab, Icon } from "@tremor/react";
import { getCallbacksCall, setCallbacksCall, serviceHealthCheck } from "./networking";
import { Modal, Form, Input, Select, Button as Button2, message } from "antd";
import { InformationCircleIcon, PencilAltIcon, PencilIcon, StatusOnlineIcon, TrashIcon, RefreshIcon } from "@heroicons/react/outline";
import {
TabPanel,
TabPanels,
TabGroup,
TabList,
Tab,
Icon,
} from "@tremor/react";
import {
getCallbacksCall,
setCallbacksCall,
getGeneralSettingsCall,
serviceHealthCheck,
updateConfigFieldSetting,
deleteConfigFieldSetting,
} from "./networking";
import {
Modal,
Form,
Input,
Select,
Button as Button2,
message,
InputNumber,
} from "antd";
import {
InformationCircleIcon,
PencilAltIcon,
PencilIcon,
StatusOnlineIcon,
TrashIcon,
RefreshIcon,
CheckCircleIcon,
XCircleIcon,
QuestionMarkCircleIcon,
} from "@heroicons/react/outline";
import StaticGenerationSearchParamsBailoutProvider from "next/dist/client/components/static-generation-searchparams-bailout-provider";
import AddFallbacks from "./add_fallbacks"
import AddFallbacks from "./add_fallbacks";
import openai from "openai";
import Paragraph from "antd/es/skeleton/Paragraph";
@ -36,7 +68,7 @@ interface GeneralSettingsPageProps {
accessToken: string | null;
userRole: string | null;
userID: string | null;
modelData: any
modelData: any;
}
async function testFallbackModelResponse(
@ -65,24 +97,39 @@ async function testFallbackModelResponse(
},
],
// @ts-ignore
mock_testing_fallbacks: true
mock_testing_fallbacks: true,
});
message.success(
<span>
Test model=<strong>{selectedModel}</strong>, received model=<strong>{response.model}</strong>.
See <a href="#" onClick={() => window.open('https://docs.litellm.ai/docs/proxy/reliability', '_blank')} style={{ textDecoration: 'underline', color: 'blue' }}>curl</a>
Test model=<strong>{selectedModel}</strong>, received model=
<strong>{response.model}</strong>. See{" "}
<a
href="#"
onClick={() =>
window.open(
"https://docs.litellm.ai/docs/proxy/reliability",
"_blank"
)
}
style={{ textDecoration: "underline", color: "blue" }}
>
curl
</a>
</span>
);
} catch (error) {
message.error(`Error occurred while generating model response. Please try again. Error: ${error}`, 20);
message.error(
`Error occurred while generating model response. Please try again. Error: ${error}`,
20
);
}
}
interface AccordionHeroProps {
selectedStrategy: string | null;
strategyArgs: routingStrategyArgs;
paramExplanation: { [key: string]: string }
paramExplanation: { [key: string]: string };
}
interface routingStrategyArgs {
@ -90,17 +137,30 @@ interface routingStrategyArgs {
lowest_latency_buffer?: number;
}
const defaultLowestLatencyArgs: routingStrategyArgs = {
"ttl": 3600,
"lowest_latency_buffer": 0
interface generalSettingsItem {
field_name: string;
field_type: string;
field_value: any;
field_description: string;
stored_in_db: boolean | null;
}
export const AccordionHero: React.FC<AccordionHeroProps> = ({ selectedStrategy, strategyArgs, paramExplanation }) => (
const defaultLowestLatencyArgs: routingStrategyArgs = {
ttl: 3600,
lowest_latency_buffer: 0,
};
export const AccordionHero: React.FC<AccordionHeroProps> = ({
selectedStrategy,
strategyArgs,
paramExplanation,
}) => (
<Accordion>
<AccordionHeader className="text-sm font-medium text-tremor-content-strong dark:text-dark-tremor-content-strong">Routing Strategy Specific Args</AccordionHeader>
<AccordionHeader className="text-sm font-medium text-tremor-content-strong dark:text-dark-tremor-content-strong">
Routing Strategy Specific Args
</AccordionHeader>
<AccordionBody>
{
selectedStrategy == "latency-based-routing" ?
{selectedStrategy == "latency-based-routing" ? (
<Card>
<Table>
<TableHead>
@ -114,13 +174,24 @@ export const AccordionHero: React.FC<AccordionHeroProps> = ({ selectedStrategy,
<TableRow key={param}>
<TableCell>
<Text>{param}</Text>
<p style={{fontSize: '0.65rem', color: '#808080', fontStyle: 'italic'}} className="mt-1">{paramExplanation[param]}</p>
<p
style={{
fontSize: "0.65rem",
color: "#808080",
fontStyle: "italic",
}}
className="mt-1"
>
{paramExplanation[param]}
</p>
</TableCell>
<TableCell>
<TextInput
name={param}
defaultValue={
typeof value === 'object' ? JSON.stringify(value, null, 2) : value.toString()
typeof value === "object"
? JSON.stringify(value, null, 2)
: value.toString()
}
/>
</TableCell>
@ -129,8 +200,9 @@ export const AccordionHero: React.FC<AccordionHeroProps> = ({ selectedStrategy,
</TableBody>
</Table>
</Card>
: <Text>No specific settings</Text>
}
) : (
<Text>No specific settings</Text>
)}
</AccordionBody>
</Accordion>
);
@ -139,26 +211,38 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
accessToken,
userRole,
userID,
modelData
modelData,
}) => {
const [routerSettings, setRouterSettings] = useState<{ [key: string]: any }>({});
const [routerSettings, setRouterSettings] = useState<{ [key: string]: any }>(
{}
);
const [generalSettingsDict, setGeneralSettingsDict] = useState<{
[key: string]: any;
}>({});
const [generalSettings, setGeneralSettings] = useState<generalSettingsItem[]>(
[]
);
const [isModalVisible, setIsModalVisible] = useState(false);
const [form] = Form.useForm();
const [selectedCallback, setSelectedCallback] = useState<string | null>(null);
const [selectedStrategy, setSelectedStrategy] = useState<string | null>(null)
const [strategySettings, setStrategySettings] = useState<routingStrategyArgs | null>(null);
const [selectedStrategy, setSelectedStrategy] = useState<string | null>(null);
const [strategySettings, setStrategySettings] =
useState<routingStrategyArgs | null>(null);
let paramExplanation: { [key: string]: string } = {
"routing_strategy_args": "(dict) Arguments to pass to the routing strategy",
"routing_strategy": "(string) Routing strategy to use",
"allowed_fails": "(int) Number of times a deployment can fail before being added to cooldown",
"cooldown_time": "(int) time in seconds to cooldown a deployment after failure",
"num_retries": "(int) Number of retries for failed requests. Defaults to 0.",
"timeout": "(float) Timeout for requests. Defaults to None.",
"retry_after": "(int) Minimum time to wait before retrying a failed request",
"ttl": "(int) Sliding window to look back over when calculating the average latency of a deployment. Default - 1 hour (in seconds).",
"lowest_latency_buffer": "(float) Shuffle between deployments within this % of the lowest latency. Default - 0 (i.e. always pick lowest latency)."
}
routing_strategy_args: "(dict) Arguments to pass to the routing strategy",
routing_strategy: "(string) Routing strategy to use",
allowed_fails:
"(int) Number of times a deployment can fail before being added to cooldown",
cooldown_time:
"(int) time in seconds to cooldown a deployment after failure",
num_retries: "(int) Number of retries for failed requests. Defaults to 0.",
timeout: "(float) Timeout for requests. Defaults to None.",
retry_after: "(int) Minimum time to wait before retrying a failed request",
ttl: "(int) Sliding window to look back over when calculating the average latency of a deployment. Default - 1 hour (in seconds).",
lowest_latency_buffer:
"(float) Shuffle between deployments within this % of the lowest latency. Default - 0 (i.e. always pick lowest latency).",
};
useEffect(() => {
if (!accessToken || !userRole || !userID) {
@ -169,6 +253,10 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
let router_settings = data.router_settings;
setRouterSettings(router_settings);
});
getGeneralSettingsCall(accessToken).then((data) => {
let general_settings = data;
setGeneralSettings(general_settings);
});
}, [accessToken, userRole, userID]);
const handleAddCallback = () => {
@ -190,8 +278,8 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
return;
}
console.log(`received key: ${key}`)
console.log(`routerSettings['fallbacks']: ${routerSettings['fallbacks']}`)
console.log(`received key: ${key}`);
console.log(`routerSettings['fallbacks']: ${routerSettings["fallbacks"]}`);
routerSettings["fallbacks"].map((dict: { [key: string]: any }) => {
// Check if the dictionary has the specified key and delete it if present
@ -202,19 +290,74 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
});
const payload = {
router_settings: routerSettings
router_settings: routerSettings,
};
try {
await setCallbacksCall(accessToken, payload);
setRouterSettings({ ...routerSettings });
setSelectedStrategy(routerSettings["routing_strategy"])
setSelectedStrategy(routerSettings["routing_strategy"]);
message.success("Router settings updated successfully");
} catch (error) {
message.error("Failed to update router settings: " + error, 20);
}
};
const handleInputChange = (fieldName: string, newValue: any) => {
// Update the value in the state
const updatedSettings = generalSettings.map((setting) =>
setting.field_name === fieldName
? { ...setting, field_value: newValue }
: setting
);
setGeneralSettings(updatedSettings);
};
const handleUpdateField = (fieldName: string, idx: number) => {
if (!accessToken) {
return;
}
let fieldValue = generalSettings[idx].field_value;
if (fieldValue == null || fieldValue == undefined) {
return;
}
try {
updateConfigFieldSetting(accessToken, fieldName, fieldValue);
// update value in state
const updatedSettings = generalSettings.map((setting) =>
setting.field_name === fieldName
? { ...setting, stored_in_db: true }
: setting
);
setGeneralSettings(updatedSettings);
} catch (error) {
// do something
}
};
const handleResetField = (fieldName: string, idx: number) => {
if (!accessToken) {
return;
}
try {
deleteConfigFieldSetting(accessToken, fieldName);
// update value in state
const updatedSettings = generalSettings.map((setting) =>
setting.field_name === fieldName
? { ...setting, stored_in_db: null, field_value: null }
: setting
);
setGeneralSettings(updatedSettings);
} catch (error) {
// do something
}
};
const handleSaveChanges = (router_settings: any) => {
if (!accessToken) {
return;
@ -223,39 +366,55 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
console.log("router_settings", router_settings);
const updatedVariables = Object.fromEntries(
Object.entries(router_settings).map(([key, value]) => {
if (key !== 'routing_strategy_args' && key !== "routing_strategy") {
return [key, (document.querySelector(`input[name="${key}"]`) as HTMLInputElement)?.value || value];
}
else if (key == "routing_strategy") {
return [key, selectedStrategy]
}
else if (key == "routing_strategy_args" && selectedStrategy == "latency-based-routing") {
let setRoutingStrategyArgs: routingStrategyArgs = {}
Object.entries(router_settings)
.map(([key, value]) => {
if (key !== "routing_strategy_args" && key !== "routing_strategy") {
return [
key,
(
document.querySelector(
`input[name="${key}"]`
) as HTMLInputElement
)?.value || value,
];
} else if (key == "routing_strategy") {
return [key, selectedStrategy];
} else if (
key == "routing_strategy_args" &&
selectedStrategy == "latency-based-routing"
) {
let setRoutingStrategyArgs: routingStrategyArgs = {};
const lowestLatencyBufferElement = document.querySelector(`input[name="lowest_latency_buffer"]`) as HTMLInputElement;
const ttlElement = document.querySelector(`input[name="ttl"]`) as HTMLInputElement;
const lowestLatencyBufferElement = document.querySelector(
`input[name="lowest_latency_buffer"]`
) as HTMLInputElement;
const ttlElement = document.querySelector(
`input[name="ttl"]`
) as HTMLInputElement;
if (lowestLatencyBufferElement?.value) {
setRoutingStrategyArgs["lowest_latency_buffer"] = Number(lowestLatencyBufferElement.value)
setRoutingStrategyArgs["lowest_latency_buffer"] = Number(
lowestLatencyBufferElement.value
);
}
if (ttlElement?.value) {
setRoutingStrategyArgs["ttl"] = Number(ttlElement.value)
setRoutingStrategyArgs["ttl"] = Number(ttlElement.value);
}
console.log(`setRoutingStrategyArgs: ${setRoutingStrategyArgs}`)
return [
"routing_strategy_args", setRoutingStrategyArgs
]
console.log(`setRoutingStrategyArgs: ${setRoutingStrategyArgs}`);
return ["routing_strategy_args", setRoutingStrategyArgs];
}
return null;
}).filter(entry => entry !== null && entry !== undefined) as Iterable<[string, unknown]>
})
.filter((entry) => entry !== null && entry !== undefined) as Iterable<
[string, unknown]
>
);
console.log("updatedVariables", updatedVariables);
const payload = {
router_settings: updatedVariables
router_settings: updatedVariables,
};
try {
@ -267,19 +426,17 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
message.success("router settings updated successfully");
};
if (!accessToken) {
return null;
}
return (
<div className="w-full mx-4">
<TabGroup className="gap-2 p-8 h-[75vh] w-full mt-2">
<TabList variant="line" defaultValue="1">
<Tab value="1">General Settings</Tab>
<Tab value="1">Loadbalancing</Tab>
<Tab value="2">Fallbacks</Tab>
<Tab value="3">General</Tab>
</TabList>
<TabPanels>
<TabPanel>
@ -294,27 +451,55 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
</TableRow>
</TableHead>
<TableBody>
{Object.entries(routerSettings).filter(([param, value]) => param != "fallbacks" && param != "context_window_fallbacks" && param != "routing_strategy_args").map(([param, value]) => (
{Object.entries(routerSettings)
.filter(
([param, value]) =>
param != "fallbacks" &&
param != "context_window_fallbacks" &&
param != "routing_strategy_args"
)
.map(([param, value]) => (
<TableRow key={param}>
<TableCell>
<Text>{param}</Text>
<p style={{fontSize: '0.65rem', color: '#808080', fontStyle: 'italic'}} className="mt-1">{paramExplanation[param]}</p>
<p
style={{
fontSize: "0.65rem",
color: "#808080",
fontStyle: "italic",
}}
className="mt-1"
>
{paramExplanation[param]}
</p>
</TableCell>
<TableCell>
{
param == "routing_strategy" ?
<Select2 defaultValue={value} className="w-full max-w-md" onValueChange={setSelectedStrategy}>
<SelectItem value="usage-based-routing">usage-based-routing</SelectItem>
<SelectItem value="latency-based-routing">latency-based-routing</SelectItem>
<SelectItem value="simple-shuffle">simple-shuffle</SelectItem>
</Select2> :
{param == "routing_strategy" ? (
<Select2
defaultValue={value}
className="w-full max-w-md"
onValueChange={setSelectedStrategy}
>
<SelectItem value="usage-based-routing">
usage-based-routing
</SelectItem>
<SelectItem value="latency-based-routing">
latency-based-routing
</SelectItem>
<SelectItem value="simple-shuffle">
simple-shuffle
</SelectItem>
</Select2>
) : (
<TextInput
name={param}
defaultValue={
typeof value === 'object' ? JSON.stringify(value, null, 2) : value.toString()
typeof value === "object"
? JSON.stringify(value, null, 2)
: value.toString()
}
/>
}
)}
</TableCell>
</TableRow>
))}
@ -323,15 +508,21 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
<AccordionHero
selectedStrategy={selectedStrategy}
strategyArgs={
routerSettings && routerSettings['routing_strategy_args'] && Object.keys(routerSettings['routing_strategy_args']).length > 0
? routerSettings['routing_strategy_args']
routerSettings &&
routerSettings["routing_strategy_args"] &&
Object.keys(routerSettings["routing_strategy_args"])
.length > 0
? routerSettings["routing_strategy_args"]
: defaultLowestLatencyArgs // default value when keys length is 0
}
paramExplanation={paramExplanation}
/>
</Card>
<Col>
<Button className="mt-2" onClick={() => handleSaveChanges(routerSettings)}>
<Button
className="mt-2"
onClick={() => handleSaveChanges(routerSettings)}
>
Save Changes
</Button>
</Col>
@ -347,15 +538,21 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
</TableHead>
<TableBody>
{
routerSettings["fallbacks"] &&
routerSettings["fallbacks"].map((item: Object, index: number) =>
{routerSettings["fallbacks"] &&
routerSettings["fallbacks"].map(
(item: Object, index: number) =>
Object.entries(item).map(([key, value]) => (
<TableRow key={index.toString() + key}>
<TableCell>{key}</TableCell>
<TableCell>{Array.isArray(value) ? value.join(', ') : value}</TableCell>
<TableCell>
<Button onClick={() => testFallbackModelResponse(key, accessToken)}>
{Array.isArray(value) ? value.join(", ") : value}
</TableCell>
<TableCell>
<Button
onClick={() =>
testFallbackModelResponse(key, accessToken)
}
>
Test Fallback
</Button>
</TableCell>
@ -368,11 +565,96 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
</TableCell>
</TableRow>
))
)
}
)}
</TableBody>
</Table>
<AddFallbacks models={modelData?.data ? modelData.data.map((data: any) => data.model_name) : []} accessToken={accessToken} routerSettings={routerSettings} setRouterSettings={setRouterSettings}/>
<AddFallbacks
models={
modelData?.data
? modelData.data.map((data: any) => data.model_name)
: []
}
accessToken={accessToken}
routerSettings={routerSettings}
setRouterSettings={setRouterSettings}
/>
</TabPanel>
<TabPanel>
<Card>
<Table>
<TableHead>
<TableRow>
<TableHeaderCell>Setting</TableHeaderCell>
<TableHeaderCell>Value</TableHeaderCell>
<TableHeaderCell>Status</TableHeaderCell>
<TableHeaderCell>Action</TableHeaderCell>
</TableRow>
</TableHead>
<TableBody>
{generalSettings.map((value, index) => (
<TableRow key={index}>
<TableCell>
<Text>{value.field_name}</Text>
<p
style={{
fontSize: "0.65rem",
color: "#808080",
fontStyle: "italic",
}}
className="mt-1"
>
{value.field_description}
</p>
</TableCell>
<TableCell>
{value.field_type == "Integer" ? (
<InputNumber
step={1}
value={value.field_value}
onChange={(newValue) =>
handleInputChange(value.field_name, newValue)
} // Handle value change
/>
) : null}
</TableCell>
<TableCell>
{value.stored_in_db == true ? (
<Badge icon={CheckCircleIcon} className="text-white">
In DB
</Badge>
) : value.stored_in_db == false ? (
<Badge className="text-gray bg-white outline">
In Config
</Badge>
) : (
<Badge className="text-gray bg-white outline">
Not Set
</Badge>
)}
</TableCell>
<TableCell>
<Button
onClick={() =>
handleUpdateField(value.field_name, index)
}
>
Update
</Button>
<Icon
icon={TrashIcon}
color="red"
onClick={() =>
handleResetField(value.field_name, index)
}
>
Reset
</Icon>
</TableCell>
</TableRow>
))}
</TableBody>
</Table>
</Card>
</TabPanel>
</TabPanels>
</TabGroup>

View file

@ -14,15 +14,17 @@ export interface Model {
export const modelCostMap = async () => {
try {
const response = await fetch('https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json');
const response = await fetch(
"https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
);
const jsonData = await response.json();
console.log(`received data: ${jsonData}`)
return jsonData
console.log(`received data: ${jsonData}`);
return jsonData;
} catch (error) {
console.error("Failed to get model cost map:", error);
throw error;
}
}
};
export const modelCreateCall = async (
accessToken: string,
@ -50,19 +52,21 @@ export const modelCreateCall = async (
const data = await response.json();
console.log("API Response:", data);
message.success("Model created successfully. Wait 60s and refresh on 'All Models' page");
message.success(
"Model created successfully. Wait 60s and refresh on 'All Models' page"
);
return data;
} catch (error) {
console.error("Failed to create key:", error);
throw error;
}
}
};
export const modelDeleteCall = async (
accessToken: string,
model_id: string,
model_id: string
) => {
console.log(`model_id in model delete call: ${model_id}`)
console.log(`model_id in model delete call: ${model_id}`);
try {
const url = proxyBaseUrl ? `${proxyBaseUrl}/model/delete` : `/model/delete`;
const response = await fetch(url, {
@ -72,7 +76,7 @@ export const modelDeleteCall = async (
"Content-Type": "application/json",
},
body: JSON.stringify({
"id": model_id,
id: model_id,
}),
});
@ -91,7 +95,7 @@ export const modelDeleteCall = async (
console.error("Failed to create key:", error);
throw error;
}
}
};
export const keyCreateCall = async (
accessToken: string,
@ -280,8 +284,7 @@ export const teamDeleteCall = async (accessToken: String, teamID: String) => {
console.error("Failed to delete key:", error);
throw error;
}
}
};
export const userInfoCall = async (
accessToken: String,
@ -300,7 +303,7 @@ export const userInfoCall = async (
url = `${url}?user_id=${userID}`;
}
console.log("in userInfoCall viewAll=", viewAll);
if (viewAll && page_size && (page != null) && (page != undefined)) {
if (viewAll && page_size && page != null && page != undefined) {
url = `${url}?view_all=true&page=${page}&page_size=${page_size}`;
}
//message.info("Requesting user data");
@ -329,10 +332,9 @@ export const userInfoCall = async (
}
};
export const teamInfoCall = async (
accessToken: String,
teamID: String | null,
teamID: String | null
) => {
try {
let url = proxyBaseUrl ? `${proxyBaseUrl}/team/info` : `/team/info`;
@ -364,10 +366,7 @@ export const teamInfoCall = async (
}
};
export const getTotalSpendCall = async (
accessToken: String,
) => {
export const getTotalSpendCall = async (accessToken: String) => {
/**
* Get all models on proxy
*/
@ -435,7 +434,6 @@ export const modelInfoCall = async (
}
};
export const modelMetricsCall = async (
accessToken: String,
userID: String,
@ -450,7 +448,7 @@ export const modelMetricsCall = async (
try {
let url = proxyBaseUrl ? `${proxyBaseUrl}/model/metrics` : `/model/metrics`;
if (modelGroup) {
url = `${url}?_selected_model_group=${modelGroup}&startTime=${startTime}&endTime=${endTime}`
url = `${url}?_selected_model_group=${modelGroup}&startTime=${startTime}&endTime=${endTime}`;
}
// message.info("Requesting model data");
const response = await fetch(url, {
@ -476,8 +474,6 @@ export const modelMetricsCall = async (
}
};
export const modelMetricsSlowResponsesCall = async (
accessToken: String,
userID: String,
@ -490,9 +486,11 @@ export const modelMetricsSlowResponsesCall = async (
* Get all models on proxy
*/
try {
let url = proxyBaseUrl ? `${proxyBaseUrl}/model/metrics/slow_responses` : `/model/metrics/slow_responses`;
let url = proxyBaseUrl
? `${proxyBaseUrl}/model/metrics/slow_responses`
: `/model/metrics/slow_responses`;
if (modelGroup) {
url = `${url}?_selected_model_group=${modelGroup}&startTime=${startTime}&endTime=${endTime}`
url = `${url}?_selected_model_group=${modelGroup}&startTime=${startTime}&endTime=${endTime}`;
}
// message.info("Requesting model data");
@ -519,7 +517,6 @@ export const modelMetricsSlowResponsesCall = async (
}
};
export const modelExceptionsCall = async (
accessToken: String,
userID: String,
@ -532,10 +529,12 @@ export const modelExceptionsCall = async (
* Get all models on proxy
*/
try {
let url = proxyBaseUrl ? `${proxyBaseUrl}/model/metrics/exceptions` : `/model/metrics/exceptions`;
let url = proxyBaseUrl
? `${proxyBaseUrl}/model/metrics/exceptions`
: `/model/metrics/exceptions`;
if (modelGroup) {
url = `${url}?_selected_model_group=${modelGroup}&startTime=${startTime}&endTime=${endTime}`
url = `${url}?_selected_model_group=${modelGroup}&startTime=${startTime}&endTime=${endTime}`;
}
const response = await fetch(url, {
method: "GET",
@ -560,7 +559,6 @@ export const modelExceptionsCall = async (
}
};
export const modelAvailableCall = async (
accessToken: String,
userID: String,
@ -625,7 +623,6 @@ export const keySpendLogsCall = async (accessToken: String, token: String) => {
}
};
export const teamSpendLogsCall = async (accessToken: String) => {
try {
const url = proxyBaseUrl
@ -654,7 +651,6 @@ export const teamSpendLogsCall = async (accessToken: String) => {
}
};
export const tagsSpendLogsCall = async (
accessToken: String,
startTime: String | undefined,
@ -666,7 +662,7 @@ export const tagsSpendLogsCall = async (
: `/global/spend/tags`;
if (startTime && endTime) {
url = `${url}?start_date=${startTime}&end_date=${endTime}`
url = `${url}?start_date=${startTime}&end_date=${endTime}`;
}
console.log("in tagsSpendLogsCall:", url);
@ -692,7 +688,6 @@ export const tagsSpendLogsCall = async (
}
};
export const userSpendLogsCall = async (
accessToken: String,
token: String,
@ -806,7 +801,11 @@ export const adminTopEndUsersCall = async (
let body = "";
if (keyToken) {
body = JSON.stringify({ api_key: keyToken, startTime: startTime, endTime: endTime });
body = JSON.stringify({
api_key: keyToken,
startTime: startTime,
endTime: endTime,
});
} else {
body = JSON.stringify({ startTime: startTime, endTime: endTime });
}
@ -1079,7 +1078,6 @@ export const teamCreateCall = async (
}
};
export const keyUpdateCall = async (
accessToken: string,
formValues: Record<string, any> // Assuming formValues is an object
@ -1347,9 +1345,10 @@ export const slackBudgetAlertsHealthCheck = async (accessToken: String) => {
}
};
export const serviceHealthCheck= async (accessToken: String, service: String) => {
export const serviceHealthCheck = async (
accessToken: String,
service: String
) => {
try {
let url = proxyBaseUrl
? `${proxyBaseUrl}/health/services?service=${service}`
@ -1373,7 +1372,9 @@ export const serviceHealthCheck= async (accessToken: String, service: String) =>
}
const data = await response.json();
message.success(`Test request to ${service} made - check logs/alerts on ${service} to verify`);
message.success(
`Test request to ${service} made - check logs/alerts on ${service} to verify`
);
// You can add additional logic here based on the response if needed
return data;
} catch (error) {
@ -1382,9 +1383,6 @@ export const serviceHealthCheck= async (accessToken: String, service: String) =>
}
};
export const getCallbacksCall = async (
accessToken: String,
userID: String,
@ -1394,7 +1392,9 @@ export const getCallbacksCall = async (
* Get all the models user has access to
*/
try {
let url = proxyBaseUrl ? `${proxyBaseUrl}/get/config/callbacks` : `/get/config/callbacks`;
let url = proxyBaseUrl
? `${proxyBaseUrl}/get/config/callbacks`
: `/get/config/callbacks`;
//message.info("Requesting model data");
const response = await fetch(url, {
@ -1421,11 +1421,117 @@ export const getCallbacksCall = async (
}
};
export const getGeneralSettingsCall = async (accessToken: String) => {
try {
let url = proxyBaseUrl
? `${proxyBaseUrl}/config/list?config_type=general_settings`
: `/config/list?config_type=general_settings`;
//message.info("Requesting model data");
const response = await fetch(url, {
method: "GET",
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json",
},
});
if (!response.ok) {
const errorData = await response.text();
message.error(errorData, 10);
throw new Error("Network response was not ok");
}
const data = await response.json();
//message.info("Received model data");
return data;
// Handle success - you might want to update some state or UI based on the created key
} catch (error) {
console.error("Failed to get callbacks:", error);
throw error;
}
};
export const updateConfigFieldSetting = async (
accessToken: String,
fieldName: string,
fieldValue: any
) => {
try {
let url = proxyBaseUrl
? `${proxyBaseUrl}/config/field/update`
: `/config/field/update`;
let formData = {
field_name: fieldName,
field_value: fieldValue,
config_type: "general_settings",
};
//message.info("Requesting model data");
const response = await fetch(url, {
method: "POST",
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json",
},
body: JSON.stringify(formData),
});
if (!response.ok) {
const errorData = await response.text();
message.error(errorData, 10);
throw new Error("Network response was not ok");
}
const data = await response.json();
//message.info("Received model data");
message.success("Successfully updated value!");
return data;
// Handle success - you might want to update some state or UI based on the created key
} catch (error) {
console.error("Failed to set callbacks:", error);
throw error;
}
};
export const deleteConfigFieldSetting = async (
accessToken: String,
fieldName: String
) => {
try {
let url = proxyBaseUrl
? `${proxyBaseUrl}/config/field/delete`
: `/config/field/delete`;
let formData = {
field_name: fieldName,
config_type: "general_settings",
};
//message.info("Requesting model data");
const response = await fetch(url, {
method: "POST",
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json",
},
body: JSON.stringify(formData),
});
if (!response.ok) {
const errorData = await response.text();
message.error(errorData, 10);
throw new Error("Network response was not ok");
}
const data = await response.json();
message.success("Field reset on proxy");
return data;
// Handle success - you might want to update some state or UI based on the created key
} catch (error) {
console.error("Failed to get callbacks:", error);
throw error;
}
};
export const setCallbacksCall = async (
accessToken: String,
formValues: Record<string, any>
@ -1464,9 +1570,7 @@ export const setCallbacksCall = async (
}
};
export const healthCheckCall = async (
accessToken: String,
) => {
export const healthCheckCall = async (accessToken: String) => {
/**
* Get all the models user has access to
*/
@ -1497,6 +1601,3 @@ export const healthCheckCall = async (
throw error;
}
};