mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
Merge branch 'main' into litellm_end_user_cost_tracking
This commit is contained in:
commit
07a1cf39e8
44 changed files with 1983 additions and 681 deletions
|
@ -102,12 +102,18 @@ Ollama supported models: https://github.com/ollama/ollama
|
||||||
| Model Name | Function Call |
|
| Model Name | Function Call |
|
||||||
|----------------------|-----------------------------------------------------------------------------------
|
|----------------------|-----------------------------------------------------------------------------------
|
||||||
| Mistral | `completion(model='ollama/mistral', messages, api_base="http://localhost:11434", stream=True)` |
|
| Mistral | `completion(model='ollama/mistral', messages, api_base="http://localhost:11434", stream=True)` |
|
||||||
|
| Mistral-7B-Instruct-v0.1 | `completion(model='ollama/mistral-7B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
|
||||||
|
| Mistral-7B-Instruct-v0.2 | `completion(model='ollama/mistral-7B-Instruct-v0.2', messages, api_base="http://localhost:11434", stream=False)` |
|
||||||
|
| Mixtral-8x7B-Instruct-v0.1 | `completion(model='ollama/mistral-8x7B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
|
||||||
|
| Mixtral-8x22B-Instruct-v0.1 | `completion(model='ollama/mixtral-8x22B-Instruct-v0.1', messages, api_base="http://localhost:11434", stream=False)` |
|
||||||
| Llama2 7B | `completion(model='ollama/llama2', messages, api_base="http://localhost:11434", stream=True)` |
|
| Llama2 7B | `completion(model='ollama/llama2', messages, api_base="http://localhost:11434", stream=True)` |
|
||||||
| Llama2 13B | `completion(model='ollama/llama2:13b', messages, api_base="http://localhost:11434", stream=True)` |
|
| Llama2 13B | `completion(model='ollama/llama2:13b', messages, api_base="http://localhost:11434", stream=True)` |
|
||||||
| Llama2 70B | `completion(model='ollama/llama2:70b', messages, api_base="http://localhost:11434", stream=True)` |
|
| Llama2 70B | `completion(model='ollama/llama2:70b', messages, api_base="http://localhost:11434", stream=True)` |
|
||||||
| Llama2 Uncensored | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` |
|
| Llama2 Uncensored | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` |
|
||||||
| Code Llama | `completion(model='ollama/codellama', messages, api_base="http://localhost:11434", stream=True)` |
|
| Code Llama | `completion(model='ollama/codellama', messages, api_base="http://localhost:11434", stream=True)` |
|
||||||
| Llama2 Uncensored | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` |
|
| Llama2 Uncensored | `completion(model='ollama/llama2-uncensored', messages, api_base="http://localhost:11434", stream=True)` |
|
||||||
|
|Meta LLaMa3 8B | `completion(model='ollama/llama3', messages, api_base="http://localhost:11434", stream=False)` |
|
||||||
|
| Meta LLaMa3 70B | `completion(model='ollama/llama3:70b', messages, api_base="http://localhost:11434", stream=False)` |
|
||||||
| Orca Mini | `completion(model='ollama/orca-mini', messages, api_base="http://localhost:11434", stream=True)` |
|
| Orca Mini | `completion(model='ollama/orca-mini', messages, api_base="http://localhost:11434", stream=True)` |
|
||||||
| Vicuna | `completion(model='ollama/vicuna', messages, api_base="http://localhost:11434", stream=True)` |
|
| Vicuna | `completion(model='ollama/vicuna', messages, api_base="http://localhost:11434", stream=True)` |
|
||||||
| Nous-Hermes | `completion(model='ollama/nous-hermes', messages, api_base="http://localhost:11434", stream=True)` |
|
| Nous-Hermes | `completion(model='ollama/nous-hermes', messages, api_base="http://localhost:11434", stream=True)` |
|
||||||
|
|
|
@ -15,11 +15,19 @@ from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
class AuthenticationError(openai.AuthenticationError): # type: ignore
|
class AuthenticationError(openai.AuthenticationError): # type: ignore
|
||||||
def __init__(self, message, llm_provider, model, response: httpx.Response):
|
def __init__(
|
||||||
|
self,
|
||||||
|
message,
|
||||||
|
llm_provider,
|
||||||
|
model,
|
||||||
|
response: httpx.Response,
|
||||||
|
litellm_debug_info: Optional[str] = None,
|
||||||
|
):
|
||||||
self.status_code = 401
|
self.status_code = 401
|
||||||
self.message = message
|
self.message = message
|
||||||
self.llm_provider = llm_provider
|
self.llm_provider = llm_provider
|
||||||
self.model = model
|
self.model = model
|
||||||
|
self.litellm_debug_info = litellm_debug_info
|
||||||
super().__init__(
|
super().__init__(
|
||||||
self.message, response=response, body=None
|
self.message, response=response, body=None
|
||||||
) # Call the base class constructor with the parameters it needs
|
) # Call the base class constructor with the parameters it needs
|
||||||
|
@ -27,11 +35,19 @@ class AuthenticationError(openai.AuthenticationError): # type: ignore
|
||||||
|
|
||||||
# raise when invalid models passed, example gpt-8
|
# raise when invalid models passed, example gpt-8
|
||||||
class NotFoundError(openai.NotFoundError): # type: ignore
|
class NotFoundError(openai.NotFoundError): # type: ignore
|
||||||
def __init__(self, message, model, llm_provider, response: httpx.Response):
|
def __init__(
|
||||||
|
self,
|
||||||
|
message,
|
||||||
|
model,
|
||||||
|
llm_provider,
|
||||||
|
response: httpx.Response,
|
||||||
|
litellm_debug_info: Optional[str] = None,
|
||||||
|
):
|
||||||
self.status_code = 404
|
self.status_code = 404
|
||||||
self.message = message
|
self.message = message
|
||||||
self.model = model
|
self.model = model
|
||||||
self.llm_provider = llm_provider
|
self.llm_provider = llm_provider
|
||||||
|
self.litellm_debug_info = litellm_debug_info
|
||||||
super().__init__(
|
super().__init__(
|
||||||
self.message, response=response, body=None
|
self.message, response=response, body=None
|
||||||
) # Call the base class constructor with the parameters it needs
|
) # Call the base class constructor with the parameters it needs
|
||||||
|
@ -39,12 +55,18 @@ class NotFoundError(openai.NotFoundError): # type: ignore
|
||||||
|
|
||||||
class BadRequestError(openai.BadRequestError): # type: ignore
|
class BadRequestError(openai.BadRequestError): # type: ignore
|
||||||
def __init__(
|
def __init__(
|
||||||
self, message, model, llm_provider, response: Optional[httpx.Response] = None
|
self,
|
||||||
|
message,
|
||||||
|
model,
|
||||||
|
llm_provider,
|
||||||
|
response: Optional[httpx.Response] = None,
|
||||||
|
litellm_debug_info: Optional[str] = None,
|
||||||
):
|
):
|
||||||
self.status_code = 400
|
self.status_code = 400
|
||||||
self.message = message
|
self.message = message
|
||||||
self.model = model
|
self.model = model
|
||||||
self.llm_provider = llm_provider
|
self.llm_provider = llm_provider
|
||||||
|
self.litellm_debug_info = litellm_debug_info
|
||||||
response = response or httpx.Response(
|
response = response or httpx.Response(
|
||||||
status_code=self.status_code,
|
status_code=self.status_code,
|
||||||
request=httpx.Request(
|
request=httpx.Request(
|
||||||
|
@ -57,18 +79,28 @@ class BadRequestError(openai.BadRequestError): # type: ignore
|
||||||
|
|
||||||
|
|
||||||
class UnprocessableEntityError(openai.UnprocessableEntityError): # type: ignore
|
class UnprocessableEntityError(openai.UnprocessableEntityError): # type: ignore
|
||||||
def __init__(self, message, model, llm_provider, response: httpx.Response):
|
def __init__(
|
||||||
|
self,
|
||||||
|
message,
|
||||||
|
model,
|
||||||
|
llm_provider,
|
||||||
|
response: httpx.Response,
|
||||||
|
litellm_debug_info: Optional[str] = None,
|
||||||
|
):
|
||||||
self.status_code = 422
|
self.status_code = 422
|
||||||
self.message = message
|
self.message = message
|
||||||
self.model = model
|
self.model = model
|
||||||
self.llm_provider = llm_provider
|
self.llm_provider = llm_provider
|
||||||
|
self.litellm_debug_info = litellm_debug_info
|
||||||
super().__init__(
|
super().__init__(
|
||||||
self.message, response=response, body=None
|
self.message, response=response, body=None
|
||||||
) # Call the base class constructor with the parameters it needs
|
) # Call the base class constructor with the parameters it needs
|
||||||
|
|
||||||
|
|
||||||
class Timeout(openai.APITimeoutError): # type: ignore
|
class Timeout(openai.APITimeoutError): # type: ignore
|
||||||
def __init__(self, message, model, llm_provider):
|
def __init__(
|
||||||
|
self, message, model, llm_provider, litellm_debug_info: Optional[str] = None
|
||||||
|
):
|
||||||
request = httpx.Request(method="POST", url="https://api.openai.com/v1")
|
request = httpx.Request(method="POST", url="https://api.openai.com/v1")
|
||||||
super().__init__(
|
super().__init__(
|
||||||
request=request
|
request=request
|
||||||
|
@ -77,6 +109,7 @@ class Timeout(openai.APITimeoutError): # type: ignore
|
||||||
self.message = message
|
self.message = message
|
||||||
self.model = model
|
self.model = model
|
||||||
self.llm_provider = llm_provider
|
self.llm_provider = llm_provider
|
||||||
|
self.litellm_debug_info = litellm_debug_info
|
||||||
|
|
||||||
# custom function to convert to str
|
# custom function to convert to str
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
|
@ -84,22 +117,38 @@ class Timeout(openai.APITimeoutError): # type: ignore
|
||||||
|
|
||||||
|
|
||||||
class PermissionDeniedError(openai.PermissionDeniedError): # type:ignore
|
class PermissionDeniedError(openai.PermissionDeniedError): # type:ignore
|
||||||
def __init__(self, message, llm_provider, model, response: httpx.Response):
|
def __init__(
|
||||||
|
self,
|
||||||
|
message,
|
||||||
|
llm_provider,
|
||||||
|
model,
|
||||||
|
response: httpx.Response,
|
||||||
|
litellm_debug_info: Optional[str] = None,
|
||||||
|
):
|
||||||
self.status_code = 403
|
self.status_code = 403
|
||||||
self.message = message
|
self.message = message
|
||||||
self.llm_provider = llm_provider
|
self.llm_provider = llm_provider
|
||||||
self.model = model
|
self.model = model
|
||||||
|
self.litellm_debug_info = litellm_debug_info
|
||||||
super().__init__(
|
super().__init__(
|
||||||
self.message, response=response, body=None
|
self.message, response=response, body=None
|
||||||
) # Call the base class constructor with the parameters it needs
|
) # Call the base class constructor with the parameters it needs
|
||||||
|
|
||||||
|
|
||||||
class RateLimitError(openai.RateLimitError): # type: ignore
|
class RateLimitError(openai.RateLimitError): # type: ignore
|
||||||
def __init__(self, message, llm_provider, model, response: httpx.Response):
|
def __init__(
|
||||||
|
self,
|
||||||
|
message,
|
||||||
|
llm_provider,
|
||||||
|
model,
|
||||||
|
response: httpx.Response,
|
||||||
|
litellm_debug_info: Optional[str] = None,
|
||||||
|
):
|
||||||
self.status_code = 429
|
self.status_code = 429
|
||||||
self.message = message
|
self.message = message
|
||||||
self.llm_provider = llm_provider
|
self.llm_provider = llm_provider
|
||||||
self.modle = model
|
self.modle = model
|
||||||
|
self.litellm_debug_info = litellm_debug_info
|
||||||
super().__init__(
|
super().__init__(
|
||||||
self.message, response=response, body=None
|
self.message, response=response, body=None
|
||||||
) # Call the base class constructor with the parameters it needs
|
) # Call the base class constructor with the parameters it needs
|
||||||
|
@ -107,11 +156,19 @@ class RateLimitError(openai.RateLimitError): # type: ignore
|
||||||
|
|
||||||
# sub class of rate limit error - meant to give more granularity for error handling context window exceeded errors
|
# sub class of rate limit error - meant to give more granularity for error handling context window exceeded errors
|
||||||
class ContextWindowExceededError(BadRequestError): # type: ignore
|
class ContextWindowExceededError(BadRequestError): # type: ignore
|
||||||
def __init__(self, message, model, llm_provider, response: httpx.Response):
|
def __init__(
|
||||||
|
self,
|
||||||
|
message,
|
||||||
|
model,
|
||||||
|
llm_provider,
|
||||||
|
response: httpx.Response,
|
||||||
|
litellm_debug_info: Optional[str] = None,
|
||||||
|
):
|
||||||
self.status_code = 400
|
self.status_code = 400
|
||||||
self.message = message
|
self.message = message
|
||||||
self.model = model
|
self.model = model
|
||||||
self.llm_provider = llm_provider
|
self.llm_provider = llm_provider
|
||||||
|
self.litellm_debug_info = litellm_debug_info
|
||||||
super().__init__(
|
super().__init__(
|
||||||
message=self.message,
|
message=self.message,
|
||||||
model=self.model, # type: ignore
|
model=self.model, # type: ignore
|
||||||
|
@ -122,11 +179,19 @@ class ContextWindowExceededError(BadRequestError): # type: ignore
|
||||||
|
|
||||||
class ContentPolicyViolationError(BadRequestError): # type: ignore
|
class ContentPolicyViolationError(BadRequestError): # type: ignore
|
||||||
# Error code: 400 - {'error': {'code': 'content_policy_violation', 'message': 'Your request was rejected as a result of our safety system. Image descriptions generated from your prompt may contain text that is not allowed by our safety system. If you believe this was done in error, your request may succeed if retried, or by adjusting your prompt.', 'param': None, 'type': 'invalid_request_error'}}
|
# Error code: 400 - {'error': {'code': 'content_policy_violation', 'message': 'Your request was rejected as a result of our safety system. Image descriptions generated from your prompt may contain text that is not allowed by our safety system. If you believe this was done in error, your request may succeed if retried, or by adjusting your prompt.', 'param': None, 'type': 'invalid_request_error'}}
|
||||||
def __init__(self, message, model, llm_provider, response: httpx.Response):
|
def __init__(
|
||||||
|
self,
|
||||||
|
message,
|
||||||
|
model,
|
||||||
|
llm_provider,
|
||||||
|
response: httpx.Response,
|
||||||
|
litellm_debug_info: Optional[str] = None,
|
||||||
|
):
|
||||||
self.status_code = 400
|
self.status_code = 400
|
||||||
self.message = message
|
self.message = message
|
||||||
self.model = model
|
self.model = model
|
||||||
self.llm_provider = llm_provider
|
self.llm_provider = llm_provider
|
||||||
|
self.litellm_debug_info = litellm_debug_info
|
||||||
super().__init__(
|
super().__init__(
|
||||||
message=self.message,
|
message=self.message,
|
||||||
model=self.model, # type: ignore
|
model=self.model, # type: ignore
|
||||||
|
@ -136,11 +201,19 @@ class ContentPolicyViolationError(BadRequestError): # type: ignore
|
||||||
|
|
||||||
|
|
||||||
class ServiceUnavailableError(openai.APIStatusError): # type: ignore
|
class ServiceUnavailableError(openai.APIStatusError): # type: ignore
|
||||||
def __init__(self, message, llm_provider, model, response: httpx.Response):
|
def __init__(
|
||||||
|
self,
|
||||||
|
message,
|
||||||
|
llm_provider,
|
||||||
|
model,
|
||||||
|
response: httpx.Response,
|
||||||
|
litellm_debug_info: Optional[str] = None,
|
||||||
|
):
|
||||||
self.status_code = 503
|
self.status_code = 503
|
||||||
self.message = message
|
self.message = message
|
||||||
self.llm_provider = llm_provider
|
self.llm_provider = llm_provider
|
||||||
self.model = model
|
self.model = model
|
||||||
|
self.litellm_debug_info = litellm_debug_info
|
||||||
super().__init__(
|
super().__init__(
|
||||||
self.message, response=response, body=None
|
self.message, response=response, body=None
|
||||||
) # Call the base class constructor with the parameters it needs
|
) # Call the base class constructor with the parameters it needs
|
||||||
|
@ -149,33 +222,51 @@ class ServiceUnavailableError(openai.APIStatusError): # type: ignore
|
||||||
# raise this when the API returns an invalid response object - https://github.com/openai/openai-python/blob/1be14ee34a0f8e42d3f9aa5451aa4cb161f1781f/openai/api_requestor.py#L401
|
# raise this when the API returns an invalid response object - https://github.com/openai/openai-python/blob/1be14ee34a0f8e42d3f9aa5451aa4cb161f1781f/openai/api_requestor.py#L401
|
||||||
class APIError(openai.APIError): # type: ignore
|
class APIError(openai.APIError): # type: ignore
|
||||||
def __init__(
|
def __init__(
|
||||||
self, status_code, message, llm_provider, model, request: httpx.Request
|
self,
|
||||||
|
status_code,
|
||||||
|
message,
|
||||||
|
llm_provider,
|
||||||
|
model,
|
||||||
|
request: httpx.Request,
|
||||||
|
litellm_debug_info: Optional[str] = None,
|
||||||
):
|
):
|
||||||
self.status_code = status_code
|
self.status_code = status_code
|
||||||
self.message = message
|
self.message = message
|
||||||
self.llm_provider = llm_provider
|
self.llm_provider = llm_provider
|
||||||
self.model = model
|
self.model = model
|
||||||
|
self.litellm_debug_info = litellm_debug_info
|
||||||
super().__init__(self.message, request=request, body=None) # type: ignore
|
super().__init__(self.message, request=request, body=None) # type: ignore
|
||||||
|
|
||||||
|
|
||||||
# raised if an invalid request (not get, delete, put, post) is made
|
# raised if an invalid request (not get, delete, put, post) is made
|
||||||
class APIConnectionError(openai.APIConnectionError): # type: ignore
|
class APIConnectionError(openai.APIConnectionError): # type: ignore
|
||||||
def __init__(self, message, llm_provider, model, request: httpx.Request):
|
def __init__(
|
||||||
|
self,
|
||||||
|
message,
|
||||||
|
llm_provider,
|
||||||
|
model,
|
||||||
|
request: httpx.Request,
|
||||||
|
litellm_debug_info: Optional[str] = None,
|
||||||
|
):
|
||||||
self.message = message
|
self.message = message
|
||||||
self.llm_provider = llm_provider
|
self.llm_provider = llm_provider
|
||||||
self.model = model
|
self.model = model
|
||||||
self.status_code = 500
|
self.status_code = 500
|
||||||
|
self.litellm_debug_info = litellm_debug_info
|
||||||
super().__init__(message=self.message, request=request)
|
super().__init__(message=self.message, request=request)
|
||||||
|
|
||||||
|
|
||||||
# raised if an invalid request (not get, delete, put, post) is made
|
# raised if an invalid request (not get, delete, put, post) is made
|
||||||
class APIResponseValidationError(openai.APIResponseValidationError): # type: ignore
|
class APIResponseValidationError(openai.APIResponseValidationError): # type: ignore
|
||||||
def __init__(self, message, llm_provider, model):
|
def __init__(
|
||||||
|
self, message, llm_provider, model, litellm_debug_info: Optional[str] = None
|
||||||
|
):
|
||||||
self.message = message
|
self.message = message
|
||||||
self.llm_provider = llm_provider
|
self.llm_provider = llm_provider
|
||||||
self.model = model
|
self.model = model
|
||||||
request = httpx.Request(method="POST", url="https://api.openai.com/v1")
|
request = httpx.Request(method="POST", url="https://api.openai.com/v1")
|
||||||
response = httpx.Response(status_code=500, request=request)
|
response = httpx.Response(status_code=500, request=request)
|
||||||
|
self.litellm_debug_info = litellm_debug_info
|
||||||
super().__init__(response=response, body=None, message=message)
|
super().__init__(response=response, body=None, message=message)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1744,6 +1744,30 @@
|
||||||
"litellm_provider": "openrouter",
|
"litellm_provider": "openrouter",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
|
"openrouter/openai/gpt-4o": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000005,
|
||||||
|
"output_cost_per_token": 0.000015,
|
||||||
|
"litellm_provider": "openrouter",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_vision": true
|
||||||
|
},
|
||||||
|
"openrouter/openai/gpt-4o-2024-05-13": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000005,
|
||||||
|
"output_cost_per_token": 0.000015,
|
||||||
|
"litellm_provider": "openrouter",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_vision": true
|
||||||
|
},
|
||||||
"openrouter/openai/gpt-4-vision-preview": {
|
"openrouter/openai/gpt-4-vision-preview": {
|
||||||
"max_tokens": 130000,
|
"max_tokens": 130000,
|
||||||
"input_cost_per_token": 0.00001,
|
"input_cost_per_token": 0.00001,
|
||||||
|
@ -2943,6 +2967,24 @@
|
||||||
"litellm_provider": "ollama",
|
"litellm_provider": "ollama",
|
||||||
"mode": "completion"
|
"mode": "completion"
|
||||||
},
|
},
|
||||||
|
"ollama/llama3": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 8192,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "ollama",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"ollama/llama3:70b": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 8192,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "ollama",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"ollama/mistral": {
|
"ollama/mistral": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 8192,
|
"max_input_tokens": 8192,
|
||||||
|
@ -2952,6 +2994,42 @@
|
||||||
"litellm_provider": "ollama",
|
"litellm_provider": "ollama",
|
||||||
"mode": "completion"
|
"mode": "completion"
|
||||||
},
|
},
|
||||||
|
"ollama/mistral-7B-Instruct-v0.1": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 8192,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "ollama",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"ollama/mistral-7B-Instruct-v0.2": {
|
||||||
|
"max_tokens": 32768,
|
||||||
|
"max_input_tokens": 32768,
|
||||||
|
"max_output_tokens": 32768,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "ollama",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"ollama/mixtral-8x7B-Instruct-v0.1": {
|
||||||
|
"max_tokens": 32768,
|
||||||
|
"max_input_tokens": 32768,
|
||||||
|
"max_output_tokens": 32768,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "ollama",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"ollama/mixtral-8x22B-Instruct-v0.1": {
|
||||||
|
"max_tokens": 65536,
|
||||||
|
"max_input_tokens": 65536,
|
||||||
|
"max_output_tokens": 65536,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "ollama",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"ollama/codellama": {
|
"ollama/codellama": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"max_input_tokens": 4096,
|
"max_input_tokens": 4096,
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f04e46b02318b660.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[7926,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"884\",\"static/chunks/884-7576ee407a2ecbe6.js\",\"931\",\"static/chunks/app/page-6a39771cacf75ea6.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f04e46b02318b660.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"obp5wqVSVDMiDTC414cR8\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f04e46b02318b660.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[4858,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"884\",\"static/chunks/884-7576ee407a2ecbe6.js\",\"931\",\"static/chunks/app/page-c35c14c9afd091ec.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f04e46b02318b660.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"2ASoJGxS-D4w-vat00xMy\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[7926,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","884","static/chunks/884-7576ee407a2ecbe6.js","931","static/chunks/app/page-6a39771cacf75ea6.js"],""]
|
3:I[4858,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","884","static/chunks/884-7576ee407a2ecbe6.js","931","static/chunks/app/page-c35c14c9afd091ec.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["obp5wqVSVDMiDTC414cR8",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f04e46b02318b660.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["2ASoJGxS-D4w-vat00xMy",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f04e46b02318b660.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -18,11 +18,6 @@ model_list:
|
||||||
model: azure/chatgpt-v-2
|
model: azure/chatgpt-v-2
|
||||||
api_key: os.environ/AZURE_API_KEY
|
api_key: os.environ/AZURE_API_KEY
|
||||||
api_base: os.environ/AZURE_API_BASE
|
api_base: os.environ/AZURE_API_BASE
|
||||||
input_cost_per_token: 0.0
|
|
||||||
output_cost_per_token: 0.0
|
|
||||||
- model_name: gpt-3.5-turbo
|
|
||||||
litellm_params:
|
|
||||||
model: gpt-4o
|
|
||||||
- model_name: bert-classifier
|
- model_name: bert-classifier
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: huggingface/text-classification/shahrukhx01/question-vs-statement-classifier
|
model: huggingface/text-classification/shahrukhx01/question-vs-statement-classifier
|
||||||
|
@ -34,14 +29,17 @@ router_settings:
|
||||||
enable_pre_call_checks: true
|
enable_pre_call_checks: true
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
set_verbose: True
|
fallbacks: [{"gpt-3.5-turbo-012": ["azure-gpt-3.5-turbo"]}]
|
||||||
fallbacks: [{"gpt-3.5-turbo-012": ["gpt-3.5-turbo-0125-preview"]}]
|
|
||||||
# service_callback: ["prometheus_system"]
|
# service_callback: ["prometheus_system"]
|
||||||
# success_callback: ["prometheus"]
|
# success_callback: ["prometheus"]
|
||||||
# failure_callback: ["prometheus"]
|
# failure_callback: ["prometheus"]
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
enable_jwt_auth: True
|
enable_jwt_auth: True
|
||||||
|
litellm_jwtauth:
|
||||||
|
team_id_default: "1234"
|
||||||
|
user_id_jwt_field:
|
||||||
|
user_id_upsert: True
|
||||||
disable_reset_budget: True
|
disable_reset_budget: True
|
||||||
proxy_batch_write_at: 10 # 👈 Frequency of batch writing logs to server (in seconds)
|
proxy_batch_write_at: 10 # 👈 Frequency of batch writing logs to server (in seconds)
|
||||||
routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
|
routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
|
||||||
|
|
|
@ -1,37 +1,11 @@
|
||||||
from pydantic import ConfigDict, BaseModel, Field, root_validator, Json, VERSION
|
from pydantic import BaseModel, Extra, Field, root_validator, Json, validator
|
||||||
|
from dataclasses import fields
|
||||||
import enum
|
import enum
|
||||||
from typing import Optional, List, Union, Dict, Literal, Any
|
from typing import Optional, List, Union, Dict, Literal, Any
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import uuid
|
import uuid, json, sys, os
|
||||||
import json
|
|
||||||
from litellm.types.router import UpdateRouterConfig
|
from litellm.types.router import UpdateRouterConfig
|
||||||
|
|
||||||
try:
|
|
||||||
from pydantic import model_validator # type: ignore
|
|
||||||
except ImportError:
|
|
||||||
from pydantic import root_validator # pydantic v1
|
|
||||||
|
|
||||||
def model_validator(mode): # type: ignore
|
|
||||||
pre = mode == "before"
|
|
||||||
return root_validator(pre=pre)
|
|
||||||
|
|
||||||
|
|
||||||
# Function to get Pydantic version
|
|
||||||
def is_pydantic_v2() -> int:
|
|
||||||
return int(VERSION.split(".")[0])
|
|
||||||
|
|
||||||
|
|
||||||
def get_model_config(arbitrary_types_allowed: bool = False) -> ConfigDict:
|
|
||||||
# Version-specific configuration
|
|
||||||
if is_pydantic_v2() >= 2:
|
|
||||||
model_config = ConfigDict(extra="allow", arbitrary_types_allowed=arbitrary_types_allowed, protected_namespaces=()) # type: ignore
|
|
||||||
else:
|
|
||||||
from pydantic import Extra
|
|
||||||
|
|
||||||
model_config = ConfigDict(extra=Extra.allow, arbitrary_types_allowed=arbitrary_types_allowed) # type: ignore
|
|
||||||
|
|
||||||
return model_config
|
|
||||||
|
|
||||||
|
|
||||||
def hash_token(token: str):
|
def hash_token(token: str):
|
||||||
import hashlib
|
import hashlib
|
||||||
|
@ -61,7 +35,8 @@ class LiteLLMBase(BaseModel):
|
||||||
# if using pydantic v1
|
# if using pydantic v1
|
||||||
return self.__fields_set__
|
return self.__fields_set__
|
||||||
|
|
||||||
model_config = get_model_config()
|
class Config:
|
||||||
|
protected_namespaces = ()
|
||||||
|
|
||||||
|
|
||||||
class LiteLLM_UpperboundKeyGenerateParams(LiteLLMBase):
|
class LiteLLM_UpperboundKeyGenerateParams(LiteLLMBase):
|
||||||
|
@ -104,11 +79,6 @@ class LiteLLMRoutes(enum.Enum):
|
||||||
"/v1/models",
|
"/v1/models",
|
||||||
]
|
]
|
||||||
|
|
||||||
# NOTE: ROUTES ONLY FOR MASTER KEY - only the Master Key should be able to Reset Spend
|
|
||||||
master_key_only_routes: List = [
|
|
||||||
"/global/spend/reset",
|
|
||||||
]
|
|
||||||
|
|
||||||
info_routes: List = [
|
info_routes: List = [
|
||||||
"/key/info",
|
"/key/info",
|
||||||
"/team/info",
|
"/team/info",
|
||||||
|
@ -119,6 +89,11 @@ class LiteLLMRoutes(enum.Enum):
|
||||||
"/v2/key/info",
|
"/v2/key/info",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# NOTE: ROUTES ONLY FOR MASTER KEY - only the Master Key should be able to Reset Spend
|
||||||
|
master_key_only_routes: List = [
|
||||||
|
"/global/spend/reset",
|
||||||
|
]
|
||||||
|
|
||||||
sso_only_routes: List = [
|
sso_only_routes: List = [
|
||||||
"/key/generate",
|
"/key/generate",
|
||||||
"/key/update",
|
"/key/update",
|
||||||
|
@ -227,13 +202,19 @@ class LiteLLM_JWTAuth(LiteLLMBase):
|
||||||
"global_spend_tracking_routes",
|
"global_spend_tracking_routes",
|
||||||
"info_routes",
|
"info_routes",
|
||||||
]
|
]
|
||||||
team_jwt_scope: str = "litellm_team"
|
team_id_jwt_field: Optional[str] = None
|
||||||
team_id_jwt_field: str = "client_id"
|
|
||||||
team_allowed_routes: List[
|
team_allowed_routes: List[
|
||||||
Literal["openai_routes", "info_routes", "management_routes"]
|
Literal["openai_routes", "info_routes", "management_routes"]
|
||||||
] = ["openai_routes", "info_routes"]
|
] = ["openai_routes", "info_routes"]
|
||||||
|
team_id_default: Optional[str] = Field(
|
||||||
|
default=None,
|
||||||
|
description="If no team_id given, default permissions/spend-tracking to this team.s",
|
||||||
|
)
|
||||||
org_id_jwt_field: Optional[str] = None
|
org_id_jwt_field: Optional[str] = None
|
||||||
user_id_jwt_field: Optional[str] = None
|
user_id_jwt_field: Optional[str] = None
|
||||||
|
user_id_upsert: bool = Field(
|
||||||
|
default=False, description="If user doesn't exist, upsert them into the db."
|
||||||
|
)
|
||||||
end_user_id_jwt_field: Optional[str] = None
|
end_user_id_jwt_field: Optional[str] = None
|
||||||
public_key_ttl: float = 600
|
public_key_ttl: float = 600
|
||||||
|
|
||||||
|
@ -259,7 +240,7 @@ class LiteLLMPromptInjectionParams(LiteLLMBase):
|
||||||
llm_api_system_prompt: Optional[str] = None
|
llm_api_system_prompt: Optional[str] = None
|
||||||
llm_api_fail_call_string: Optional[str] = None
|
llm_api_fail_call_string: Optional[str] = None
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@root_validator(pre=True)
|
||||||
def check_llm_api_params(cls, values):
|
def check_llm_api_params(cls, values):
|
||||||
llm_api_check = values.get("llm_api_check")
|
llm_api_check = values.get("llm_api_check")
|
||||||
if llm_api_check is True:
|
if llm_api_check is True:
|
||||||
|
@ -317,7 +298,8 @@ class ProxyChatCompletionRequest(LiteLLMBase):
|
||||||
deployment_id: Optional[str] = None
|
deployment_id: Optional[str] = None
|
||||||
request_timeout: Optional[int] = None
|
request_timeout: Optional[int] = None
|
||||||
|
|
||||||
model_config = get_model_config()
|
class Config:
|
||||||
|
extra = "allow" # allow params not defined here, these fall in litellm.completion(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
class ModelInfoDelete(LiteLLMBase):
|
class ModelInfoDelete(LiteLLMBase):
|
||||||
|
@ -344,9 +326,11 @@ class ModelInfo(LiteLLMBase):
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
|
||||||
model_config = get_model_config()
|
class Config:
|
||||||
|
extra = Extra.allow # Allow extra fields
|
||||||
|
protected_namespaces = ()
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@root_validator(pre=True)
|
||||||
def set_model_info(cls, values):
|
def set_model_info(cls, values):
|
||||||
if values.get("id") is None:
|
if values.get("id") is None:
|
||||||
values.update({"id": str(uuid.uuid4())})
|
values.update({"id": str(uuid.uuid4())})
|
||||||
|
@ -372,9 +356,10 @@ class ModelParams(LiteLLMBase):
|
||||||
litellm_params: dict
|
litellm_params: dict
|
||||||
model_info: ModelInfo
|
model_info: ModelInfo
|
||||||
|
|
||||||
model_config = get_model_config()
|
class Config:
|
||||||
|
protected_namespaces = ()
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@root_validator(pre=True)
|
||||||
def set_model_info(cls, values):
|
def set_model_info(cls, values):
|
||||||
if values.get("model_info") is None:
|
if values.get("model_info") is None:
|
||||||
values.update({"model_info": ModelInfo()})
|
values.update({"model_info": ModelInfo()})
|
||||||
|
@ -410,7 +395,8 @@ class GenerateKeyRequest(GenerateRequestBase):
|
||||||
{}
|
{}
|
||||||
) # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
|
) # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
|
||||||
|
|
||||||
model_config = get_model_config()
|
class Config:
|
||||||
|
protected_namespaces = ()
|
||||||
|
|
||||||
|
|
||||||
class GenerateKeyResponse(GenerateKeyRequest):
|
class GenerateKeyResponse(GenerateKeyRequest):
|
||||||
|
@ -420,7 +406,7 @@ class GenerateKeyResponse(GenerateKeyRequest):
|
||||||
user_id: Optional[str] = None
|
user_id: Optional[str] = None
|
||||||
token_id: Optional[str] = None
|
token_id: Optional[str] = None
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@root_validator(pre=True)
|
||||||
def set_model_info(cls, values):
|
def set_model_info(cls, values):
|
||||||
if values.get("token") is not None:
|
if values.get("token") is not None:
|
||||||
values.update({"key": values.get("token")})
|
values.update({"key": values.get("token")})
|
||||||
|
@ -460,7 +446,8 @@ class LiteLLM_ModelTable(LiteLLMBase):
|
||||||
created_by: str
|
created_by: str
|
||||||
updated_by: str
|
updated_by: str
|
||||||
|
|
||||||
model_config = get_model_config()
|
class Config:
|
||||||
|
protected_namespaces = ()
|
||||||
|
|
||||||
|
|
||||||
class NewUserRequest(GenerateKeyRequest):
|
class NewUserRequest(GenerateKeyRequest):
|
||||||
|
@ -488,7 +475,7 @@ class UpdateUserRequest(GenerateRequestBase):
|
||||||
user_role: Optional[str] = None
|
user_role: Optional[str] = None
|
||||||
max_budget: Optional[float] = None
|
max_budget: Optional[float] = None
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@root_validator(pre=True)
|
||||||
def check_user_info(cls, values):
|
def check_user_info(cls, values):
|
||||||
if values.get("user_id") is None and values.get("user_email") is None:
|
if values.get("user_id") is None and values.get("user_email") is None:
|
||||||
raise ValueError("Either user id or user email must be provided")
|
raise ValueError("Either user id or user email must be provided")
|
||||||
|
@ -508,7 +495,7 @@ class NewEndUserRequest(LiteLLMBase):
|
||||||
None # if no equivalent model in allowed region - default all requests to this model
|
None # if no equivalent model in allowed region - default all requests to this model
|
||||||
)
|
)
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@root_validator(pre=True)
|
||||||
def check_user_info(cls, values):
|
def check_user_info(cls, values):
|
||||||
if values.get("max_budget") is not None and values.get("budget_id") is not None:
|
if values.get("max_budget") is not None and values.get("budget_id") is not None:
|
||||||
raise ValueError("Set either 'max_budget' or 'budget_id', not both.")
|
raise ValueError("Set either 'max_budget' or 'budget_id', not both.")
|
||||||
|
@ -521,7 +508,7 @@ class Member(LiteLLMBase):
|
||||||
user_id: Optional[str] = None
|
user_id: Optional[str] = None
|
||||||
user_email: Optional[str] = None
|
user_email: Optional[str] = None
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@root_validator(pre=True)
|
||||||
def check_user_info(cls, values):
|
def check_user_info(cls, values):
|
||||||
if values.get("user_id") is None and values.get("user_email") is None:
|
if values.get("user_id") is None and values.get("user_email") is None:
|
||||||
raise ValueError("Either user id or user email must be provided")
|
raise ValueError("Either user id or user email must be provided")
|
||||||
|
@ -546,7 +533,8 @@ class TeamBase(LiteLLMBase):
|
||||||
class NewTeamRequest(TeamBase):
|
class NewTeamRequest(TeamBase):
|
||||||
model_aliases: Optional[dict] = None
|
model_aliases: Optional[dict] = None
|
||||||
|
|
||||||
model_config = get_model_config()
|
class Config:
|
||||||
|
protected_namespaces = ()
|
||||||
|
|
||||||
|
|
||||||
class GlobalEndUsersSpend(LiteLLMBase):
|
class GlobalEndUsersSpend(LiteLLMBase):
|
||||||
|
@ -565,7 +553,7 @@ class TeamMemberDeleteRequest(LiteLLMBase):
|
||||||
user_id: Optional[str] = None
|
user_id: Optional[str] = None
|
||||||
user_email: Optional[str] = None
|
user_email: Optional[str] = None
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@root_validator(pre=True)
|
||||||
def check_user_info(cls, values):
|
def check_user_info(cls, values):
|
||||||
if values.get("user_id") is None and values.get("user_email") is None:
|
if values.get("user_id") is None and values.get("user_email") is None:
|
||||||
raise ValueError("Either user id or user email must be provided")
|
raise ValueError("Either user id or user email must be provided")
|
||||||
|
@ -599,9 +587,10 @@ class LiteLLM_TeamTable(TeamBase):
|
||||||
budget_reset_at: Optional[datetime] = None
|
budget_reset_at: Optional[datetime] = None
|
||||||
model_id: Optional[int] = None
|
model_id: Optional[int] = None
|
||||||
|
|
||||||
model_config = get_model_config()
|
class Config:
|
||||||
|
protected_namespaces = ()
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@root_validator(pre=True)
|
||||||
def set_model_info(cls, values):
|
def set_model_info(cls, values):
|
||||||
dict_fields = [
|
dict_fields = [
|
||||||
"metadata",
|
"metadata",
|
||||||
|
@ -637,7 +626,8 @@ class LiteLLM_BudgetTable(LiteLLMBase):
|
||||||
model_max_budget: Optional[dict] = None
|
model_max_budget: Optional[dict] = None
|
||||||
budget_duration: Optional[str] = None
|
budget_duration: Optional[str] = None
|
||||||
|
|
||||||
model_config = get_model_config()
|
class Config:
|
||||||
|
protected_namespaces = ()
|
||||||
|
|
||||||
|
|
||||||
class NewOrganizationRequest(LiteLLM_BudgetTable):
|
class NewOrganizationRequest(LiteLLM_BudgetTable):
|
||||||
|
@ -687,7 +677,8 @@ class KeyManagementSettings(LiteLLMBase):
|
||||||
class TeamDefaultSettings(LiteLLMBase):
|
class TeamDefaultSettings(LiteLLMBase):
|
||||||
team_id: str
|
team_id: str
|
||||||
|
|
||||||
model_config = get_model_config()
|
class Config:
|
||||||
|
extra = "allow" # allow params not defined here, these fall in litellm.completion(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
class DynamoDBArgs(LiteLLMBase):
|
class DynamoDBArgs(LiteLLMBase):
|
||||||
|
@ -711,6 +702,25 @@ class DynamoDBArgs(LiteLLMBase):
|
||||||
assume_role_aws_session_name: Optional[str] = None
|
assume_role_aws_session_name: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class ConfigFieldUpdate(LiteLLMBase):
|
||||||
|
field_name: str
|
||||||
|
field_value: Any
|
||||||
|
config_type: Literal["general_settings"]
|
||||||
|
|
||||||
|
|
||||||
|
class ConfigFieldDelete(LiteLLMBase):
|
||||||
|
config_type: Literal["general_settings"]
|
||||||
|
field_name: str
|
||||||
|
|
||||||
|
|
||||||
|
class ConfigList(LiteLLMBase):
|
||||||
|
field_name: str
|
||||||
|
field_type: str
|
||||||
|
field_description: str
|
||||||
|
field_value: Any
|
||||||
|
stored_in_db: Optional[bool]
|
||||||
|
|
||||||
|
|
||||||
class ConfigGeneralSettings(LiteLLMBase):
|
class ConfigGeneralSettings(LiteLLMBase):
|
||||||
"""
|
"""
|
||||||
Documents all the fields supported by `general_settings` in config.yaml
|
Documents all the fields supported by `general_settings` in config.yaml
|
||||||
|
@ -758,7 +768,11 @@ class ConfigGeneralSettings(LiteLLMBase):
|
||||||
description="override user_api_key_auth with your own auth script - https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth",
|
description="override user_api_key_auth with your own auth script - https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth",
|
||||||
)
|
)
|
||||||
max_parallel_requests: Optional[int] = Field(
|
max_parallel_requests: Optional[int] = Field(
|
||||||
None, description="maximum parallel requests for each api key"
|
None,
|
||||||
|
description="maximum parallel requests for each api key",
|
||||||
|
)
|
||||||
|
global_max_parallel_requests: Optional[int] = Field(
|
||||||
|
None, description="global max parallel requests to allow for a proxy instance."
|
||||||
)
|
)
|
||||||
infer_model_from_keys: Optional[bool] = Field(
|
infer_model_from_keys: Optional[bool] = Field(
|
||||||
None,
|
None,
|
||||||
|
@ -828,7 +842,8 @@ class ConfigYAML(LiteLLMBase):
|
||||||
description="litellm router object settings. See router.py __init__ for all, example router.num_retries=5, router.timeout=5, router.max_retries=5, router.retry_after=5",
|
description="litellm router object settings. See router.py __init__ for all, example router.num_retries=5, router.timeout=5, router.max_retries=5, router.retry_after=5",
|
||||||
)
|
)
|
||||||
|
|
||||||
model_config = get_model_config()
|
class Config:
|
||||||
|
protected_namespaces = ()
|
||||||
|
|
||||||
|
|
||||||
class LiteLLM_VerificationToken(LiteLLMBase):
|
class LiteLLM_VerificationToken(LiteLLMBase):
|
||||||
|
@ -862,7 +877,8 @@ class LiteLLM_VerificationToken(LiteLLMBase):
|
||||||
user_id_rate_limits: Optional[dict] = None
|
user_id_rate_limits: Optional[dict] = None
|
||||||
team_id_rate_limits: Optional[dict] = None
|
team_id_rate_limits: Optional[dict] = None
|
||||||
|
|
||||||
model_config = get_model_config()
|
class Config:
|
||||||
|
protected_namespaces = ()
|
||||||
|
|
||||||
|
|
||||||
class LiteLLM_VerificationTokenView(LiteLLM_VerificationToken):
|
class LiteLLM_VerificationTokenView(LiteLLM_VerificationToken):
|
||||||
|
@ -892,7 +908,7 @@ class UserAPIKeyAuth(
|
||||||
user_role: Optional[Literal["proxy_admin", "app_owner", "app_user"]] = None
|
user_role: Optional[Literal["proxy_admin", "app_owner", "app_user"]] = None
|
||||||
allowed_model_region: Optional[Literal["eu"]] = None
|
allowed_model_region: Optional[Literal["eu"]] = None
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@root_validator(pre=True)
|
||||||
def check_api_key(cls, values):
|
def check_api_key(cls, values):
|
||||||
if values.get("api_key") is not None:
|
if values.get("api_key") is not None:
|
||||||
values.update({"token": hash_token(values.get("api_key"))})
|
values.update({"token": hash_token(values.get("api_key"))})
|
||||||
|
@ -919,7 +935,7 @@ class LiteLLM_UserTable(LiteLLMBase):
|
||||||
tpm_limit: Optional[int] = None
|
tpm_limit: Optional[int] = None
|
||||||
rpm_limit: Optional[int] = None
|
rpm_limit: Optional[int] = None
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@root_validator(pre=True)
|
||||||
def set_model_info(cls, values):
|
def set_model_info(cls, values):
|
||||||
if values.get("spend") is None:
|
if values.get("spend") is None:
|
||||||
values.update({"spend": 0.0})
|
values.update({"spend": 0.0})
|
||||||
|
@ -927,7 +943,8 @@ class LiteLLM_UserTable(LiteLLMBase):
|
||||||
values.update({"models": []})
|
values.update({"models": []})
|
||||||
return values
|
return values
|
||||||
|
|
||||||
model_config = get_model_config()
|
class Config:
|
||||||
|
protected_namespaces = ()
|
||||||
|
|
||||||
|
|
||||||
class LiteLLM_EndUserTable(LiteLLMBase):
|
class LiteLLM_EndUserTable(LiteLLMBase):
|
||||||
|
@ -939,13 +956,14 @@ class LiteLLM_EndUserTable(LiteLLMBase):
|
||||||
default_model: Optional[str] = None
|
default_model: Optional[str] = None
|
||||||
litellm_budget_table: Optional[LiteLLM_BudgetTable] = None
|
litellm_budget_table: Optional[LiteLLM_BudgetTable] = None
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@root_validator(pre=True)
|
||||||
def set_model_info(cls, values):
|
def set_model_info(cls, values):
|
||||||
if values.get("spend") is None:
|
if values.get("spend") is None:
|
||||||
values.update({"spend": 0.0})
|
values.update({"spend": 0.0})
|
||||||
return values
|
return values
|
||||||
|
|
||||||
model_config = get_model_config()
|
class Config:
|
||||||
|
protected_namespaces = ()
|
||||||
|
|
||||||
|
|
||||||
class LiteLLM_SpendLogs(LiteLLMBase):
|
class LiteLLM_SpendLogs(LiteLLMBase):
|
||||||
|
|
|
@ -26,7 +26,7 @@ all_routes = LiteLLMRoutes.openai_routes.value + LiteLLMRoutes.management_routes
|
||||||
|
|
||||||
def common_checks(
|
def common_checks(
|
||||||
request_body: dict,
|
request_body: dict,
|
||||||
team_object: LiteLLM_TeamTable,
|
team_object: Optional[LiteLLM_TeamTable],
|
||||||
user_object: Optional[LiteLLM_UserTable],
|
user_object: Optional[LiteLLM_UserTable],
|
||||||
end_user_object: Optional[LiteLLM_EndUserTable],
|
end_user_object: Optional[LiteLLM_EndUserTable],
|
||||||
global_proxy_spend: Optional[float],
|
global_proxy_spend: Optional[float],
|
||||||
|
@ -45,13 +45,14 @@ def common_checks(
|
||||||
6. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
|
6. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
|
||||||
"""
|
"""
|
||||||
_model = request_body.get("model", None)
|
_model = request_body.get("model", None)
|
||||||
if team_object.blocked == True:
|
if team_object is not None and team_object.blocked == True:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"Team={team_object.team_id} is blocked. Update via `/team/unblock` if your admin."
|
f"Team={team_object.team_id} is blocked. Update via `/team/unblock` if your admin."
|
||||||
)
|
)
|
||||||
# 2. If user can call model
|
# 2. If user can call model
|
||||||
if (
|
if (
|
||||||
_model is not None
|
_model is not None
|
||||||
|
and team_object is not None
|
||||||
and len(team_object.models) > 0
|
and len(team_object.models) > 0
|
||||||
and _model not in team_object.models
|
and _model not in team_object.models
|
||||||
):
|
):
|
||||||
|
@ -65,7 +66,8 @@ def common_checks(
|
||||||
)
|
)
|
||||||
# 3. If team is in budget
|
# 3. If team is in budget
|
||||||
if (
|
if (
|
||||||
team_object.max_budget is not None
|
team_object is not None
|
||||||
|
and team_object.max_budget is not None
|
||||||
and team_object.spend is not None
|
and team_object.spend is not None
|
||||||
and team_object.spend > team_object.max_budget
|
and team_object.spend > team_object.max_budget
|
||||||
):
|
):
|
||||||
|
@ -239,6 +241,7 @@ async def get_user_object(
|
||||||
user_id: str,
|
user_id: str,
|
||||||
prisma_client: Optional[PrismaClient],
|
prisma_client: Optional[PrismaClient],
|
||||||
user_api_key_cache: DualCache,
|
user_api_key_cache: DualCache,
|
||||||
|
user_id_upsert: bool,
|
||||||
) -> Optional[LiteLLM_UserTable]:
|
) -> Optional[LiteLLM_UserTable]:
|
||||||
"""
|
"""
|
||||||
- Check if user id in proxy User Table
|
- Check if user id in proxy User Table
|
||||||
|
@ -252,7 +255,7 @@ async def get_user_object(
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# check if in cache
|
# check if in cache
|
||||||
cached_user_obj = user_api_key_cache.async_get_cache(key=user_id)
|
cached_user_obj = await user_api_key_cache.async_get_cache(key=user_id)
|
||||||
if cached_user_obj is not None:
|
if cached_user_obj is not None:
|
||||||
if isinstance(cached_user_obj, dict):
|
if isinstance(cached_user_obj, dict):
|
||||||
return LiteLLM_UserTable(**cached_user_obj)
|
return LiteLLM_UserTable(**cached_user_obj)
|
||||||
|
@ -260,16 +263,27 @@ async def get_user_object(
|
||||||
return cached_user_obj
|
return cached_user_obj
|
||||||
# else, check db
|
# else, check db
|
||||||
try:
|
try:
|
||||||
|
|
||||||
response = await prisma_client.db.litellm_usertable.find_unique(
|
response = await prisma_client.db.litellm_usertable.find_unique(
|
||||||
where={"user_id": user_id}
|
where={"user_id": user_id}
|
||||||
)
|
)
|
||||||
|
|
||||||
if response is None:
|
if response is None:
|
||||||
|
if user_id_upsert:
|
||||||
|
response = await prisma_client.db.litellm_usertable.create(
|
||||||
|
data={"user_id": user_id}
|
||||||
|
)
|
||||||
|
else:
|
||||||
raise Exception
|
raise Exception
|
||||||
|
|
||||||
return LiteLLM_UserTable(**response.dict())
|
_response = LiteLLM_UserTable(**dict(response))
|
||||||
except Exception as e: # if end-user not in db
|
|
||||||
raise Exception(
|
# save the user object to cache
|
||||||
|
await user_api_key_cache.async_set_cache(key=user_id, value=_response)
|
||||||
|
|
||||||
|
return _response
|
||||||
|
except Exception as e: # if user not in db
|
||||||
|
raise ValueError(
|
||||||
f"User doesn't exist in db. 'user_id'={user_id}. Create user via `/user/new` call."
|
f"User doesn't exist in db. 'user_id'={user_id}. Create user via `/user/new` call."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -290,7 +304,7 @@ async def get_team_object(
|
||||||
)
|
)
|
||||||
|
|
||||||
# check if in cache
|
# check if in cache
|
||||||
cached_team_obj = user_api_key_cache.async_get_cache(key=team_id)
|
cached_team_obj = await user_api_key_cache.async_get_cache(key=team_id)
|
||||||
if cached_team_obj is not None:
|
if cached_team_obj is not None:
|
||||||
if isinstance(cached_team_obj, dict):
|
if isinstance(cached_team_obj, dict):
|
||||||
return LiteLLM_TeamTable(**cached_team_obj)
|
return LiteLLM_TeamTable(**cached_team_obj)
|
||||||
|
@ -305,7 +319,11 @@ async def get_team_object(
|
||||||
if response is None:
|
if response is None:
|
||||||
raise Exception
|
raise Exception
|
||||||
|
|
||||||
return LiteLLM_TeamTable(**response.dict())
|
_response = LiteLLM_TeamTable(**response.dict())
|
||||||
|
# save the team object to cache
|
||||||
|
await user_api_key_cache.async_set_cache(key=response.team_id, value=_response)
|
||||||
|
|
||||||
|
return _response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"Team doesn't exist in db. Team={team_id}. Create team via `/team/new` call."
|
f"Team doesn't exist in db. Team={team_id}. Create team via `/team/new` call."
|
||||||
|
|
|
@ -55,12 +55,9 @@ class JWTHandler:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def is_team(self, scopes: list) -> bool:
|
def get_end_user_id(
|
||||||
if self.litellm_jwtauth.team_jwt_scope in scopes:
|
self, token: dict, default_value: Optional[str]
|
||||||
return True
|
) -> Optional[str]:
|
||||||
return False
|
|
||||||
|
|
||||||
def get_end_user_id(self, token: dict, default_value: Optional[str]) -> str:
|
|
||||||
try:
|
try:
|
||||||
if self.litellm_jwtauth.end_user_id_jwt_field is not None:
|
if self.litellm_jwtauth.end_user_id_jwt_field is not None:
|
||||||
user_id = token[self.litellm_jwtauth.end_user_id_jwt_field]
|
user_id = token[self.litellm_jwtauth.end_user_id_jwt_field]
|
||||||
|
@ -70,13 +67,36 @@ class JWTHandler:
|
||||||
user_id = default_value
|
user_id = default_value
|
||||||
return user_id
|
return user_id
|
||||||
|
|
||||||
|
def is_required_team_id(self) -> bool:
|
||||||
|
"""
|
||||||
|
Returns:
|
||||||
|
- True: if 'team_id_jwt_field' is set
|
||||||
|
- False: if not
|
||||||
|
"""
|
||||||
|
if self.litellm_jwtauth.team_id_jwt_field is None:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
def get_team_id(self, token: dict, default_value: Optional[str]) -> Optional[str]:
|
def get_team_id(self, token: dict, default_value: Optional[str]) -> Optional[str]:
|
||||||
try:
|
try:
|
||||||
|
if self.litellm_jwtauth.team_id_jwt_field is not None:
|
||||||
team_id = token[self.litellm_jwtauth.team_id_jwt_field]
|
team_id = token[self.litellm_jwtauth.team_id_jwt_field]
|
||||||
|
elif self.litellm_jwtauth.team_id_default is not None:
|
||||||
|
team_id = self.litellm_jwtauth.team_id_default
|
||||||
|
else:
|
||||||
|
team_id = None
|
||||||
except KeyError:
|
except KeyError:
|
||||||
team_id = default_value
|
team_id = default_value
|
||||||
return team_id
|
return team_id
|
||||||
|
|
||||||
|
def is_upsert_user_id(self) -> bool:
|
||||||
|
"""
|
||||||
|
Returns:
|
||||||
|
- True: if 'user_id_upsert' is set
|
||||||
|
- False: if not
|
||||||
|
"""
|
||||||
|
return self.litellm_jwtauth.user_id_upsert
|
||||||
|
|
||||||
def get_user_id(self, token: dict, default_value: Optional[str]) -> Optional[str]:
|
def get_user_id(self, token: dict, default_value: Optional[str]) -> Optional[str]:
|
||||||
try:
|
try:
|
||||||
if self.litellm_jwtauth.user_id_jwt_field is not None:
|
if self.litellm_jwtauth.user_id_jwt_field is not None:
|
||||||
|
@ -207,12 +227,14 @@ class JWTHandler:
|
||||||
raise Exception(f"Validation fails: {str(e)}")
|
raise Exception(f"Validation fails: {str(e)}")
|
||||||
elif public_key is not None and isinstance(public_key, str):
|
elif public_key is not None and isinstance(public_key, str):
|
||||||
try:
|
try:
|
||||||
cert = x509.load_pem_x509_certificate(public_key.encode(), default_backend())
|
cert = x509.load_pem_x509_certificate(
|
||||||
|
public_key.encode(), default_backend()
|
||||||
|
)
|
||||||
|
|
||||||
# Extract public key
|
# Extract public key
|
||||||
key = cert.public_key().public_bytes(
|
key = cert.public_key().public_bytes(
|
||||||
serialization.Encoding.PEM,
|
serialization.Encoding.PEM,
|
||||||
serialization.PublicFormat.SubjectPublicKeyInfo
|
serialization.PublicFormat.SubjectPublicKeyInfo,
|
||||||
)
|
)
|
||||||
|
|
||||||
# decode the token using the public key
|
# decode the token using the public key
|
||||||
|
@ -221,7 +243,7 @@ class JWTHandler:
|
||||||
key,
|
key,
|
||||||
algorithms=algorithms,
|
algorithms=algorithms,
|
||||||
audience=audience,
|
audience=audience,
|
||||||
options=decode_options
|
options=decode_options,
|
||||||
)
|
)
|
||||||
return payload
|
return payload
|
||||||
|
|
||||||
|
|
|
@ -79,6 +79,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
||||||
max_parallel_requests = user_api_key_dict.max_parallel_requests
|
max_parallel_requests = user_api_key_dict.max_parallel_requests
|
||||||
if max_parallel_requests is None:
|
if max_parallel_requests is None:
|
||||||
max_parallel_requests = sys.maxsize
|
max_parallel_requests = sys.maxsize
|
||||||
|
global_max_parallel_requests = data.get("metadata", {}).get(
|
||||||
|
"global_max_parallel_requests", None
|
||||||
|
)
|
||||||
tpm_limit = getattr(user_api_key_dict, "tpm_limit", sys.maxsize)
|
tpm_limit = getattr(user_api_key_dict, "tpm_limit", sys.maxsize)
|
||||||
if tpm_limit is None:
|
if tpm_limit is None:
|
||||||
tpm_limit = sys.maxsize
|
tpm_limit = sys.maxsize
|
||||||
|
@ -91,6 +94,24 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
||||||
# Setup values
|
# Setup values
|
||||||
# ------------
|
# ------------
|
||||||
|
|
||||||
|
if global_max_parallel_requests is not None:
|
||||||
|
# get value from cache
|
||||||
|
_key = "global_max_parallel_requests"
|
||||||
|
current_global_requests = await cache.async_get_cache(
|
||||||
|
key=_key, local_only=True
|
||||||
|
)
|
||||||
|
# check if below limit
|
||||||
|
if current_global_requests is None:
|
||||||
|
current_global_requests = 1
|
||||||
|
# if above -> raise error
|
||||||
|
if current_global_requests >= global_max_parallel_requests:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=429, detail="Max parallel request limit reached."
|
||||||
|
)
|
||||||
|
# if below -> increment
|
||||||
|
else:
|
||||||
|
await cache.async_increment_cache(key=_key, value=1, local_only=True)
|
||||||
|
|
||||||
current_date = datetime.now().strftime("%Y-%m-%d")
|
current_date = datetime.now().strftime("%Y-%m-%d")
|
||||||
current_hour = datetime.now().strftime("%H")
|
current_hour = datetime.now().strftime("%H")
|
||||||
current_minute = datetime.now().strftime("%M")
|
current_minute = datetime.now().strftime("%M")
|
||||||
|
@ -207,6 +228,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
||||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
try:
|
try:
|
||||||
self.print_verbose(f"INSIDE parallel request limiter ASYNC SUCCESS LOGGING")
|
self.print_verbose(f"INSIDE parallel request limiter ASYNC SUCCESS LOGGING")
|
||||||
|
global_max_parallel_requests = kwargs["litellm_params"]["metadata"].get(
|
||||||
|
"global_max_parallel_requests", None
|
||||||
|
)
|
||||||
user_api_key = kwargs["litellm_params"]["metadata"]["user_api_key"]
|
user_api_key = kwargs["litellm_params"]["metadata"]["user_api_key"]
|
||||||
user_api_key_user_id = kwargs["litellm_params"]["metadata"].get(
|
user_api_key_user_id = kwargs["litellm_params"]["metadata"].get(
|
||||||
"user_api_key_user_id", None
|
"user_api_key_user_id", None
|
||||||
|
@ -222,6 +246,14 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
||||||
# Setup values
|
# Setup values
|
||||||
# ------------
|
# ------------
|
||||||
|
|
||||||
|
if global_max_parallel_requests is not None:
|
||||||
|
# get value from cache
|
||||||
|
_key = "global_max_parallel_requests"
|
||||||
|
# decrement
|
||||||
|
await self.user_api_key_cache.async_increment_cache(
|
||||||
|
key=_key, value=-1, local_only=True
|
||||||
|
)
|
||||||
|
|
||||||
current_date = datetime.now().strftime("%Y-%m-%d")
|
current_date = datetime.now().strftime("%Y-%m-%d")
|
||||||
current_hour = datetime.now().strftime("%H")
|
current_hour = datetime.now().strftime("%H")
|
||||||
current_minute = datetime.now().strftime("%M")
|
current_minute = datetime.now().strftime("%M")
|
||||||
|
@ -336,6 +368,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
||||||
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
try:
|
try:
|
||||||
self.print_verbose(f"Inside Max Parallel Request Failure Hook")
|
self.print_verbose(f"Inside Max Parallel Request Failure Hook")
|
||||||
|
global_max_parallel_requests = kwargs["litellm_params"]["metadata"].get(
|
||||||
|
"global_max_parallel_requests", None
|
||||||
|
)
|
||||||
user_api_key = (
|
user_api_key = (
|
||||||
kwargs["litellm_params"].get("metadata", {}).get("user_api_key", None)
|
kwargs["litellm_params"].get("metadata", {}).get("user_api_key", None)
|
||||||
)
|
)
|
||||||
|
@ -347,17 +382,26 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
||||||
return
|
return
|
||||||
|
|
||||||
## decrement call count if call failed
|
## decrement call count if call failed
|
||||||
if (
|
if "Max parallel request limit reached" in str(kwargs["exception"]):
|
||||||
hasattr(kwargs["exception"], "status_code")
|
|
||||||
and kwargs["exception"].status_code == 429
|
|
||||||
and "Max parallel request limit reached" in str(kwargs["exception"])
|
|
||||||
):
|
|
||||||
pass # ignore failed calls due to max limit being reached
|
pass # ignore failed calls due to max limit being reached
|
||||||
else:
|
else:
|
||||||
# ------------
|
# ------------
|
||||||
# Setup values
|
# Setup values
|
||||||
# ------------
|
# ------------
|
||||||
|
|
||||||
|
if global_max_parallel_requests is not None:
|
||||||
|
# get value from cache
|
||||||
|
_key = "global_max_parallel_requests"
|
||||||
|
current_global_requests = (
|
||||||
|
await self.user_api_key_cache.async_get_cache(
|
||||||
|
key=_key, local_only=True
|
||||||
|
)
|
||||||
|
)
|
||||||
|
# decrement
|
||||||
|
await self.user_api_key_cache.async_increment_cache(
|
||||||
|
key=_key, value=-1, local_only=True
|
||||||
|
)
|
||||||
|
|
||||||
current_date = datetime.now().strftime("%Y-%m-%d")
|
current_date = datetime.now().strftime("%Y-%m-%d")
|
||||||
current_hour = datetime.now().strftime("%H")
|
current_hour = datetime.now().strftime("%H")
|
||||||
current_minute = datetime.now().strftime("%M")
|
current_minute = datetime.now().strftime("%M")
|
||||||
|
|
|
@ -234,6 +234,7 @@ class SpecialModelNames(enum.Enum):
|
||||||
class CommonProxyErrors(enum.Enum):
|
class CommonProxyErrors(enum.Enum):
|
||||||
db_not_connected_error = "DB not connected"
|
db_not_connected_error = "DB not connected"
|
||||||
no_llm_router = "No models configured on proxy"
|
no_llm_router = "No models configured on proxy"
|
||||||
|
not_allowed_access = "Admin-only endpoint. Not allowed to access this."
|
||||||
|
|
||||||
|
|
||||||
@app.exception_handler(ProxyException)
|
@app.exception_handler(ProxyException)
|
||||||
|
@ -440,10 +441,13 @@ async def user_api_key_auth(
|
||||||
# get team id
|
# get team id
|
||||||
team_id = jwt_handler.get_team_id(token=valid_token, default_value=None)
|
team_id = jwt_handler.get_team_id(token=valid_token, default_value=None)
|
||||||
|
|
||||||
if team_id is None:
|
if team_id is None and jwt_handler.is_required_team_id() == True:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"No team id passed in. Field checked in jwt token - '{jwt_handler.litellm_jwtauth.team_id_jwt_field}'"
|
f"No team id passed in. Field checked in jwt token - '{jwt_handler.litellm_jwtauth.team_id_jwt_field}'"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
team_object: Optional[LiteLLM_TeamTable] = None
|
||||||
|
if team_id is not None:
|
||||||
# check allowed team routes
|
# check allowed team routes
|
||||||
is_allowed = allowed_routes_check(
|
is_allowed = allowed_routes_check(
|
||||||
user_role="team",
|
user_role="team",
|
||||||
|
@ -481,11 +485,9 @@ async def user_api_key_auth(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
prisma_client=prisma_client,
|
prisma_client=prisma_client,
|
||||||
user_api_key_cache=user_api_key_cache,
|
user_api_key_cache=user_api_key_cache,
|
||||||
|
user_id_upsert=jwt_handler.is_upsert_user_id(),
|
||||||
)
|
)
|
||||||
# save the user object to cache
|
|
||||||
await user_api_key_cache.async_set_cache(
|
|
||||||
key=user_id, value=user_object
|
|
||||||
)
|
|
||||||
# [OPTIONAL] track spend against an external user - `LiteLLM_EndUserTable`
|
# [OPTIONAL] track spend against an external user - `LiteLLM_EndUserTable`
|
||||||
end_user_object = None
|
end_user_object = None
|
||||||
end_user_id = jwt_handler.get_end_user_id(
|
end_user_id = jwt_handler.get_end_user_id(
|
||||||
|
@ -547,18 +549,18 @@ async def user_api_key_auth(
|
||||||
global_proxy_spend=global_proxy_spend,
|
global_proxy_spend=global_proxy_spend,
|
||||||
route=route,
|
route=route,
|
||||||
)
|
)
|
||||||
# save team object in cache
|
|
||||||
await user_api_key_cache.async_set_cache(
|
|
||||||
key=team_object.team_id, value=team_object
|
|
||||||
)
|
|
||||||
|
|
||||||
# return UserAPIKeyAuth object
|
# return UserAPIKeyAuth object
|
||||||
return UserAPIKeyAuth(
|
return UserAPIKeyAuth(
|
||||||
api_key=None,
|
api_key=None,
|
||||||
team_id=team_object.team_id,
|
team_id=team_object.team_id if team_object is not None else None,
|
||||||
team_tpm_limit=team_object.tpm_limit,
|
team_tpm_limit=(
|
||||||
team_rpm_limit=team_object.rpm_limit,
|
team_object.tpm_limit if team_object is not None else None
|
||||||
team_models=team_object.models,
|
),
|
||||||
|
team_rpm_limit=(
|
||||||
|
team_object.rpm_limit if team_object is not None else None
|
||||||
|
),
|
||||||
|
team_models=team_object.models if team_object is not None else [],
|
||||||
user_role="app_owner",
|
user_role="app_owner",
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
org_id=org_id,
|
org_id=org_id,
|
||||||
|
@ -566,9 +568,9 @@ async def user_api_key_auth(
|
||||||
#### ELSE ####
|
#### ELSE ####
|
||||||
if master_key is None:
|
if master_key is None:
|
||||||
if isinstance(api_key, str):
|
if isinstance(api_key, str):
|
||||||
return UserAPIKeyAuth(api_key=api_key)
|
return UserAPIKeyAuth(api_key=api_key, user_role="proxy_admin")
|
||||||
else:
|
else:
|
||||||
return UserAPIKeyAuth()
|
return UserAPIKeyAuth(user_role="proxy_admin")
|
||||||
elif api_key is None: # only require api key if master key is set
|
elif api_key is None: # only require api key if master key is set
|
||||||
raise Exception("No api key passed in.")
|
raise Exception("No api key passed in.")
|
||||||
elif api_key == "":
|
elif api_key == "":
|
||||||
|
@ -659,6 +661,7 @@ async def user_api_key_auth(
|
||||||
verbose_proxy_logger.debug("Token from db: %s", valid_token)
|
verbose_proxy_logger.debug("Token from db: %s", valid_token)
|
||||||
elif valid_token is not None:
|
elif valid_token is not None:
|
||||||
verbose_proxy_logger.debug("API Key Cache Hit!")
|
verbose_proxy_logger.debug("API Key Cache Hit!")
|
||||||
|
|
||||||
user_id_information = None
|
user_id_information = None
|
||||||
if valid_token:
|
if valid_token:
|
||||||
# Got Valid Token from Cache, DB
|
# Got Valid Token from Cache, DB
|
||||||
|
@ -1187,6 +1190,17 @@ async def user_api_key_auth(
|
||||||
# No token was found when looking up in the DB
|
# No token was found when looking up in the DB
|
||||||
raise Exception("Invalid token passed")
|
raise Exception("Invalid token passed")
|
||||||
if valid_token_dict is not None:
|
if valid_token_dict is not None:
|
||||||
|
if user_id_information is not None and _is_user_proxy_admin(
|
||||||
|
user_id_information
|
||||||
|
):
|
||||||
|
return UserAPIKeyAuth(
|
||||||
|
api_key=api_key, user_role="proxy_admin", **valid_token_dict
|
||||||
|
)
|
||||||
|
elif _has_user_setup_sso() and route in LiteLLMRoutes.sso_only_routes.value:
|
||||||
|
return UserAPIKeyAuth(
|
||||||
|
api_key=api_key, user_role="app_owner", **valid_token_dict
|
||||||
|
)
|
||||||
|
else:
|
||||||
return UserAPIKeyAuth(api_key=api_key, **valid_token_dict)
|
return UserAPIKeyAuth(api_key=api_key, **valid_token_dict)
|
||||||
else:
|
else:
|
||||||
raise Exception()
|
raise Exception()
|
||||||
|
@ -2684,7 +2698,19 @@ class ProxyConfig:
|
||||||
"Error setting env variable: %s - %s", k, str(e)
|
"Error setting env variable: %s - %s", k, str(e)
|
||||||
)
|
)
|
||||||
|
|
||||||
# general_settings
|
# router settings
|
||||||
|
if llm_router is not None and prisma_client is not None:
|
||||||
|
db_router_settings = await prisma_client.db.litellm_config.find_first(
|
||||||
|
where={"param_name": "router_settings"}
|
||||||
|
)
|
||||||
|
if (
|
||||||
|
db_router_settings is not None
|
||||||
|
and db_router_settings.param_value is not None
|
||||||
|
):
|
||||||
|
_router_settings = db_router_settings.param_value
|
||||||
|
llm_router.update_settings(**_router_settings)
|
||||||
|
|
||||||
|
## ALERTING ## [TODO] move this to the _update_general_settings() block
|
||||||
_general_settings = config_data.get("general_settings", {})
|
_general_settings = config_data.get("general_settings", {})
|
||||||
if "alerting" in _general_settings:
|
if "alerting" in _general_settings:
|
||||||
general_settings["alerting"] = _general_settings["alerting"]
|
general_settings["alerting"] = _general_settings["alerting"]
|
||||||
|
@ -2708,17 +2734,24 @@ class ProxyConfig:
|
||||||
alert_to_webhook_url=general_settings["alert_to_webhook_url"]
|
alert_to_webhook_url=general_settings["alert_to_webhook_url"]
|
||||||
)
|
)
|
||||||
|
|
||||||
# router settings
|
async def _update_general_settings(self, db_general_settings: Optional[Json]):
|
||||||
if llm_router is not None and prisma_client is not None:
|
"""
|
||||||
db_router_settings = await prisma_client.db.litellm_config.find_first(
|
Pull from DB, read general settings value
|
||||||
where={"param_name": "router_settings"}
|
"""
|
||||||
)
|
global general_settings
|
||||||
if (
|
if db_general_settings is None:
|
||||||
db_router_settings is not None
|
return
|
||||||
and db_router_settings.param_value is not None
|
_general_settings = dict(db_general_settings)
|
||||||
):
|
## MAX PARALLEL REQUESTS ##
|
||||||
_router_settings = db_router_settings.param_value
|
if "max_parallel_requests" in _general_settings:
|
||||||
llm_router.update_settings(**_router_settings)
|
general_settings["max_parallel_requests"] = _general_settings[
|
||||||
|
"max_parallel_requests"
|
||||||
|
]
|
||||||
|
|
||||||
|
if "global_max_parallel_requests" in _general_settings:
|
||||||
|
general_settings["global_max_parallel_requests"] = _general_settings[
|
||||||
|
"global_max_parallel_requests"
|
||||||
|
]
|
||||||
|
|
||||||
async def add_deployment(
|
async def add_deployment(
|
||||||
self,
|
self,
|
||||||
|
@ -2726,7 +2759,7 @@ class ProxyConfig:
|
||||||
proxy_logging_obj: ProxyLogging,
|
proxy_logging_obj: ProxyLogging,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
- Check db for new models (last 10 most recently updated)
|
- Check db for new models
|
||||||
- Check if model id's in router already
|
- Check if model id's in router already
|
||||||
- If not, add to router
|
- If not, add to router
|
||||||
"""
|
"""
|
||||||
|
@ -2739,9 +2772,21 @@ class ProxyConfig:
|
||||||
)
|
)
|
||||||
verbose_proxy_logger.debug(f"llm_router: {llm_router}")
|
verbose_proxy_logger.debug(f"llm_router: {llm_router}")
|
||||||
new_models = await prisma_client.db.litellm_proxymodeltable.find_many()
|
new_models = await prisma_client.db.litellm_proxymodeltable.find_many()
|
||||||
|
# update llm router
|
||||||
await self._update_llm_router(
|
await self._update_llm_router(
|
||||||
new_models=new_models, proxy_logging_obj=proxy_logging_obj
|
new_models=new_models, proxy_logging_obj=proxy_logging_obj
|
||||||
)
|
)
|
||||||
|
|
||||||
|
db_general_settings = await prisma_client.db.litellm_config.find_first(
|
||||||
|
where={"param_name": "general_settings"}
|
||||||
|
)
|
||||||
|
|
||||||
|
# update general settings
|
||||||
|
if db_general_settings is not None:
|
||||||
|
await self._update_general_settings(
|
||||||
|
db_general_settings=db_general_settings.param_value,
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.error(
|
verbose_proxy_logger.error(
|
||||||
"{}\nTraceback:{}".format(str(e), traceback.format_exc())
|
"{}\nTraceback:{}".format(str(e), traceback.format_exc())
|
||||||
|
@ -2941,27 +2986,6 @@ async def generate_key_helper_fn(
|
||||||
data=key_data, table_name="key"
|
data=key_data, table_name="key"
|
||||||
)
|
)
|
||||||
key_data["token_id"] = getattr(create_key_response, "token", None)
|
key_data["token_id"] = getattr(create_key_response, "token", None)
|
||||||
elif custom_db_client is not None:
|
|
||||||
if table_name is None or table_name == "user":
|
|
||||||
## CREATE USER (If necessary)
|
|
||||||
verbose_proxy_logger.debug(
|
|
||||||
"CustomDBClient: Creating User= %s", user_data
|
|
||||||
)
|
|
||||||
user_row = await custom_db_client.insert_data(
|
|
||||||
value=user_data, table_name="user"
|
|
||||||
)
|
|
||||||
if user_row is None:
|
|
||||||
# GET USER ROW
|
|
||||||
user_row = await custom_db_client.get_data(
|
|
||||||
key=user_id, table_name="user" # type: ignore
|
|
||||||
)
|
|
||||||
|
|
||||||
## use default user model list if no key-specific model list provided
|
|
||||||
if len(user_row.models) > 0 and len(key_data["models"]) == 0: # type: ignore
|
|
||||||
key_data["models"] = user_row.models
|
|
||||||
## CREATE KEY
|
|
||||||
verbose_proxy_logger.debug("CustomDBClient: Creating Key= %s", key_data)
|
|
||||||
await custom_db_client.insert_data(value=key_data, table_name="key")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
|
@ -3557,6 +3581,9 @@ async def chat_completion(
|
||||||
data["metadata"]["user_api_key_alias"] = getattr(
|
data["metadata"]["user_api_key_alias"] = getattr(
|
||||||
user_api_key_dict, "key_alias", None
|
user_api_key_dict, "key_alias", None
|
||||||
)
|
)
|
||||||
|
data["metadata"]["global_max_parallel_requests"] = general_settings.get(
|
||||||
|
"global_max_parallel_requests", None
|
||||||
|
)
|
||||||
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
||||||
data["metadata"]["user_api_key_org_id"] = user_api_key_dict.org_id
|
data["metadata"]["user_api_key_org_id"] = user_api_key_dict.org_id
|
||||||
data["metadata"]["user_api_key_team_id"] = getattr(
|
data["metadata"]["user_api_key_team_id"] = getattr(
|
||||||
|
@ -3746,8 +3773,11 @@ async def chat_completion(
|
||||||
await proxy_logging_obj.post_call_failure_hook(
|
await proxy_logging_obj.post_call_failure_hook(
|
||||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||||
)
|
)
|
||||||
|
litellm_debug_info = getattr(e, "litellm_debug_info", "")
|
||||||
verbose_proxy_logger.debug(
|
verbose_proxy_logger.debug(
|
||||||
f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`"
|
"\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
|
||||||
|
e,
|
||||||
|
litellm_debug_info,
|
||||||
)
|
)
|
||||||
router_model_names = llm_router.model_names if llm_router is not None else []
|
router_model_names = llm_router.model_names if llm_router is not None else []
|
||||||
if user_debug:
|
if user_debug:
|
||||||
|
@ -3795,6 +3825,7 @@ async def completion(
|
||||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
):
|
):
|
||||||
global user_temperature, user_request_timeout, user_max_tokens, user_api_base
|
global user_temperature, user_request_timeout, user_max_tokens, user_api_base
|
||||||
|
data = {}
|
||||||
check_request_disconnected = None
|
check_request_disconnected = None
|
||||||
try:
|
try:
|
||||||
body = await request.body()
|
body = await request.body()
|
||||||
|
@ -3824,6 +3855,9 @@ async def completion(
|
||||||
data["metadata"]["user_api_key_team_id"] = getattr(
|
data["metadata"]["user_api_key_team_id"] = getattr(
|
||||||
user_api_key_dict, "team_id", None
|
user_api_key_dict, "team_id", None
|
||||||
)
|
)
|
||||||
|
data["metadata"]["global_max_parallel_requests"] = general_settings.get(
|
||||||
|
"global_max_parallel_requests", None
|
||||||
|
)
|
||||||
data["metadata"]["user_api_key_team_alias"] = getattr(
|
data["metadata"]["user_api_key_team_alias"] = getattr(
|
||||||
user_api_key_dict, "team_alias", None
|
user_api_key_dict, "team_alias", None
|
||||||
)
|
)
|
||||||
|
@ -3904,6 +3938,9 @@ async def completion(
|
||||||
cache_key = hidden_params.get("cache_key", None) or ""
|
cache_key = hidden_params.get("cache_key", None) or ""
|
||||||
api_base = hidden_params.get("api_base", None) or ""
|
api_base = hidden_params.get("api_base", None) or ""
|
||||||
|
|
||||||
|
### ALERTING ###
|
||||||
|
data["litellm_status"] = "success" # used for alerting
|
||||||
|
|
||||||
verbose_proxy_logger.debug("final response: %s", response)
|
verbose_proxy_logger.debug("final response: %s", response)
|
||||||
if (
|
if (
|
||||||
"stream" in data and data["stream"] == True
|
"stream" in data and data["stream"] == True
|
||||||
|
@ -3934,10 +3971,15 @@ async def completion(
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
data["litellm_status"] = "fail" # used for alerting
|
data["litellm_status"] = "fail" # used for alerting
|
||||||
|
await proxy_logging_obj.post_call_failure_hook(
|
||||||
|
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||||
|
)
|
||||||
verbose_proxy_logger.debug("EXCEPTION RAISED IN PROXY MAIN.PY")
|
verbose_proxy_logger.debug("EXCEPTION RAISED IN PROXY MAIN.PY")
|
||||||
|
litellm_debug_info = getattr(e, "litellm_debug_info", "")
|
||||||
verbose_proxy_logger.debug(
|
verbose_proxy_logger.debug(
|
||||||
"\033[1;31mAn error occurred: %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
|
"\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
|
||||||
e,
|
e,
|
||||||
|
litellm_debug_info,
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
error_traceback = traceback.format_exc()
|
error_traceback = traceback.format_exc()
|
||||||
|
@ -4015,6 +4057,9 @@ async def embeddings(
|
||||||
data["metadata"]["user_api_key_alias"] = getattr(
|
data["metadata"]["user_api_key_alias"] = getattr(
|
||||||
user_api_key_dict, "key_alias", None
|
user_api_key_dict, "key_alias", None
|
||||||
)
|
)
|
||||||
|
data["metadata"]["global_max_parallel_requests"] = general_settings.get(
|
||||||
|
"global_max_parallel_requests", None
|
||||||
|
)
|
||||||
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
||||||
data["metadata"]["user_api_key_team_id"] = getattr(
|
data["metadata"]["user_api_key_team_id"] = getattr(
|
||||||
user_api_key_dict, "team_id", None
|
user_api_key_dict, "team_id", None
|
||||||
|
@ -4140,6 +4185,12 @@ async def embeddings(
|
||||||
await proxy_logging_obj.post_call_failure_hook(
|
await proxy_logging_obj.post_call_failure_hook(
|
||||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||||
)
|
)
|
||||||
|
litellm_debug_info = getattr(e, "litellm_debug_info", "")
|
||||||
|
verbose_proxy_logger.debug(
|
||||||
|
"\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
|
||||||
|
e,
|
||||||
|
litellm_debug_info,
|
||||||
|
)
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
|
@ -4213,6 +4264,9 @@ async def image_generation(
|
||||||
data["metadata"]["user_api_key_alias"] = getattr(
|
data["metadata"]["user_api_key_alias"] = getattr(
|
||||||
user_api_key_dict, "key_alias", None
|
user_api_key_dict, "key_alias", None
|
||||||
)
|
)
|
||||||
|
data["metadata"]["global_max_parallel_requests"] = general_settings.get(
|
||||||
|
"global_max_parallel_requests", None
|
||||||
|
)
|
||||||
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
||||||
data["metadata"]["user_api_key_team_id"] = getattr(
|
data["metadata"]["user_api_key_team_id"] = getattr(
|
||||||
user_api_key_dict, "team_id", None
|
user_api_key_dict, "team_id", None
|
||||||
|
@ -4393,6 +4447,9 @@ async def audio_transcriptions(
|
||||||
data["metadata"]["user_api_key_team_id"] = getattr(
|
data["metadata"]["user_api_key_team_id"] = getattr(
|
||||||
user_api_key_dict, "team_id", None
|
user_api_key_dict, "team_id", None
|
||||||
)
|
)
|
||||||
|
data["metadata"]["global_max_parallel_requests"] = general_settings.get(
|
||||||
|
"global_max_parallel_requests", None
|
||||||
|
)
|
||||||
data["metadata"]["user_api_key_team_alias"] = getattr(
|
data["metadata"]["user_api_key_team_alias"] = getattr(
|
||||||
user_api_key_dict, "team_alias", None
|
user_api_key_dict, "team_alias", None
|
||||||
)
|
)
|
||||||
|
@ -4590,6 +4647,9 @@ async def moderations(
|
||||||
"authorization", None
|
"authorization", None
|
||||||
) # do not store the original `sk-..` api key in the db
|
) # do not store the original `sk-..` api key in the db
|
||||||
data["metadata"]["headers"] = _headers
|
data["metadata"]["headers"] = _headers
|
||||||
|
data["metadata"]["global_max_parallel_requests"] = general_settings.get(
|
||||||
|
"global_max_parallel_requests", None
|
||||||
|
)
|
||||||
data["metadata"]["user_api_key_alias"] = getattr(
|
data["metadata"]["user_api_key_alias"] = getattr(
|
||||||
user_api_key_dict, "key_alias", None
|
user_api_key_dict, "key_alias", None
|
||||||
)
|
)
|
||||||
|
@ -7697,7 +7757,12 @@ async def new_organization(
|
||||||
|
|
||||||
If none provided, create one based on provided values
|
If none provided, create one based on provided values
|
||||||
"""
|
"""
|
||||||
budget_row = LiteLLM_BudgetTable(**data.json(exclude_none=True))
|
budget_params = LiteLLM_BudgetTable.model_fields.keys()
|
||||||
|
|
||||||
|
# Only include Budget Params when creating an entry in litellm_budgettable
|
||||||
|
_json_data = data.json(exclude_none=True)
|
||||||
|
_budget_data = {k: v for k, v in _json_data.items() if k in budget_params}
|
||||||
|
budget_row = LiteLLM_BudgetTable(**_budget_data)
|
||||||
|
|
||||||
new_budget = prisma_client.jsonify_object(budget_row.json(exclude_none=True))
|
new_budget = prisma_client.jsonify_object(budget_row.json(exclude_none=True))
|
||||||
|
|
||||||
|
@ -9288,7 +9353,7 @@ async def auth_callback(request: Request):
|
||||||
return RedirectResponse(url=litellm_dashboard_ui)
|
return RedirectResponse(url=litellm_dashboard_ui)
|
||||||
|
|
||||||
|
|
||||||
#### BASIC ENDPOINTS ####
|
#### CONFIG MANAGEMENT ####
|
||||||
@router.post(
|
@router.post(
|
||||||
"/config/update",
|
"/config/update",
|
||||||
tags=["config.yaml"],
|
tags=["config.yaml"],
|
||||||
|
@ -9424,6 +9489,299 @@ async def update_config(config_info: ConfigYAML):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
### CONFIG GENERAL SETTINGS
|
||||||
|
"""
|
||||||
|
- Update config settings
|
||||||
|
- Get config settings
|
||||||
|
|
||||||
|
Keep it more precise, to prevent overwrite other values unintentially
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/config/field/update",
|
||||||
|
tags=["config.yaml"],
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
)
|
||||||
|
async def update_config_general_settings(
|
||||||
|
data: ConfigFieldUpdate,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Update a specific field in litellm general settings
|
||||||
|
"""
|
||||||
|
global prisma_client
|
||||||
|
## VALIDATION ##
|
||||||
|
"""
|
||||||
|
- Check if prisma_client is None
|
||||||
|
- Check if user allowed to call this endpoint (admin-only)
|
||||||
|
- Check if param in general settings
|
||||||
|
- Check if config value is valid type
|
||||||
|
"""
|
||||||
|
|
||||||
|
if prisma_client is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail={"error": CommonProxyErrors.db_not_connected_error.value},
|
||||||
|
)
|
||||||
|
|
||||||
|
if user_api_key_dict.user_role != "proxy_admin":
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail={"error": CommonProxyErrors.not_allowed_access.value},
|
||||||
|
)
|
||||||
|
|
||||||
|
if data.field_name not in ConfigGeneralSettings.model_fields:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail={"error": "Invalid field={} passed in.".format(data.field_name)},
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
cgs = ConfigGeneralSettings(**{data.field_name: data.field_value})
|
||||||
|
except:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail={
|
||||||
|
"error": "Invalid type of field value={} passed in.".format(
|
||||||
|
type(data.field_value),
|
||||||
|
)
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
## get general settings from db
|
||||||
|
db_general_settings = await prisma_client.db.litellm_config.find_first(
|
||||||
|
where={"param_name": "general_settings"}
|
||||||
|
)
|
||||||
|
### update value
|
||||||
|
|
||||||
|
if db_general_settings is None or db_general_settings.param_value is None:
|
||||||
|
general_settings = {}
|
||||||
|
else:
|
||||||
|
general_settings = dict(db_general_settings.param_value)
|
||||||
|
|
||||||
|
## update db
|
||||||
|
|
||||||
|
general_settings[data.field_name] = data.field_value
|
||||||
|
|
||||||
|
response = await prisma_client.db.litellm_config.upsert(
|
||||||
|
where={"param_name": "general_settings"},
|
||||||
|
data={
|
||||||
|
"create": {"param_name": "general_settings", "param_value": json.dumps(general_settings)}, # type: ignore
|
||||||
|
"update": {"param_value": json.dumps(general_settings)}, # type: ignore
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/config/field/info",
|
||||||
|
tags=["config.yaml"],
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
)
|
||||||
|
async def get_config_general_settings(
|
||||||
|
field_name: str,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
|
):
|
||||||
|
global prisma_client
|
||||||
|
|
||||||
|
## VALIDATION ##
|
||||||
|
"""
|
||||||
|
- Check if prisma_client is None
|
||||||
|
- Check if user allowed to call this endpoint (admin-only)
|
||||||
|
- Check if param in general settings
|
||||||
|
"""
|
||||||
|
if prisma_client is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail={"error": CommonProxyErrors.db_not_connected_error.value},
|
||||||
|
)
|
||||||
|
|
||||||
|
if user_api_key_dict.user_role != "proxy_admin":
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail={"error": CommonProxyErrors.not_allowed_access.value},
|
||||||
|
)
|
||||||
|
|
||||||
|
if field_name not in ConfigGeneralSettings.model_fields:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail={"error": "Invalid field={} passed in.".format(field_name)},
|
||||||
|
)
|
||||||
|
|
||||||
|
## get general settings from db
|
||||||
|
db_general_settings = await prisma_client.db.litellm_config.find_first(
|
||||||
|
where={"param_name": "general_settings"}
|
||||||
|
)
|
||||||
|
### pop the value
|
||||||
|
|
||||||
|
if db_general_settings is None or db_general_settings.param_value is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail={"error": "Field name={} not in DB".format(field_name)},
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
general_settings = dict(db_general_settings.param_value)
|
||||||
|
|
||||||
|
if field_name in general_settings:
|
||||||
|
return {
|
||||||
|
"field_name": field_name,
|
||||||
|
"field_value": general_settings[field_name],
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail={"error": "Field name={} not in DB".format(field_name)},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/config/list",
|
||||||
|
tags=["config.yaml"],
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
)
|
||||||
|
async def get_config_list(
|
||||||
|
config_type: Literal["general_settings"],
|
||||||
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
|
) -> List[ConfigList]:
|
||||||
|
"""
|
||||||
|
List the available fields + current values for a given type of setting (currently just 'general_settings'user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),)
|
||||||
|
"""
|
||||||
|
global prisma_client, general_settings
|
||||||
|
|
||||||
|
## VALIDATION ##
|
||||||
|
"""
|
||||||
|
- Check if prisma_client is None
|
||||||
|
- Check if user allowed to call this endpoint (admin-only)
|
||||||
|
- Check if param in general settings
|
||||||
|
"""
|
||||||
|
if prisma_client is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail={"error": CommonProxyErrors.db_not_connected_error.value},
|
||||||
|
)
|
||||||
|
|
||||||
|
if user_api_key_dict.user_role != "proxy_admin":
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail={
|
||||||
|
"error": "{}, your role={}".format(
|
||||||
|
CommonProxyErrors.not_allowed_access.value,
|
||||||
|
user_api_key_dict.user_role,
|
||||||
|
)
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
## get general settings from db
|
||||||
|
db_general_settings = await prisma_client.db.litellm_config.find_first(
|
||||||
|
where={"param_name": "general_settings"}
|
||||||
|
)
|
||||||
|
|
||||||
|
if db_general_settings is not None and db_general_settings.param_value is not None:
|
||||||
|
db_general_settings_dict = dict(db_general_settings.param_value)
|
||||||
|
else:
|
||||||
|
db_general_settings_dict = {}
|
||||||
|
|
||||||
|
allowed_args = {
|
||||||
|
"max_parallel_requests": {"type": "Integer"},
|
||||||
|
"global_max_parallel_requests": {"type": "Integer"},
|
||||||
|
}
|
||||||
|
|
||||||
|
return_val = []
|
||||||
|
|
||||||
|
for field_name, field_info in ConfigGeneralSettings.model_fields.items():
|
||||||
|
if field_name in allowed_args:
|
||||||
|
|
||||||
|
_stored_in_db = None
|
||||||
|
if field_name in db_general_settings_dict:
|
||||||
|
_stored_in_db = True
|
||||||
|
elif field_name in general_settings:
|
||||||
|
_stored_in_db = False
|
||||||
|
|
||||||
|
_response_obj = ConfigList(
|
||||||
|
field_name=field_name,
|
||||||
|
field_type=allowed_args[field_name]["type"],
|
||||||
|
field_description=field_info.description or "",
|
||||||
|
field_value=general_settings.get(field_name, None),
|
||||||
|
stored_in_db=_stored_in_db,
|
||||||
|
)
|
||||||
|
return_val.append(_response_obj)
|
||||||
|
|
||||||
|
return return_val
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/config/field/delete",
|
||||||
|
tags=["config.yaml"],
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
)
|
||||||
|
async def delete_config_general_settings(
|
||||||
|
data: ConfigFieldDelete,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Delete the db value of this field in litellm general settings. Resets it to it's initial default value on litellm.
|
||||||
|
"""
|
||||||
|
global prisma_client
|
||||||
|
## VALIDATION ##
|
||||||
|
"""
|
||||||
|
- Check if prisma_client is None
|
||||||
|
- Check if user allowed to call this endpoint (admin-only)
|
||||||
|
- Check if param in general settings
|
||||||
|
"""
|
||||||
|
if prisma_client is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail={"error": CommonProxyErrors.db_not_connected_error.value},
|
||||||
|
)
|
||||||
|
|
||||||
|
if user_api_key_dict.user_role != "proxy_admin":
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail={
|
||||||
|
"error": "{}, your role={}".format(
|
||||||
|
CommonProxyErrors.not_allowed_access.value,
|
||||||
|
user_api_key_dict.user_role,
|
||||||
|
)
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if data.field_name not in ConfigGeneralSettings.model_fields:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail={"error": "Invalid field={} passed in.".format(data.field_name)},
|
||||||
|
)
|
||||||
|
|
||||||
|
## get general settings from db
|
||||||
|
db_general_settings = await prisma_client.db.litellm_config.find_first(
|
||||||
|
where={"param_name": "general_settings"}
|
||||||
|
)
|
||||||
|
### pop the value
|
||||||
|
|
||||||
|
if db_general_settings is None or db_general_settings.param_value is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail={"error": "Field name={} not in config".format(data.field_name)},
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
general_settings = dict(db_general_settings.param_value)
|
||||||
|
|
||||||
|
## update db
|
||||||
|
|
||||||
|
general_settings.pop(data.field_name, None)
|
||||||
|
|
||||||
|
response = await prisma_client.db.litellm_config.upsert(
|
||||||
|
where={"param_name": "general_settings"},
|
||||||
|
data={
|
||||||
|
"create": {"param_name": "general_settings", "param_value": json.dumps(general_settings)}, # type: ignore
|
||||||
|
"update": {"param_value": json.dumps(general_settings)}, # type: ignore
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
@router.get(
|
@router.get(
|
||||||
"/get/config/callbacks",
|
"/get/config/callbacks",
|
||||||
tags=["config.yaml"],
|
tags=["config.yaml"],
|
||||||
|
@ -9591,6 +9949,7 @@ async def config_yaml_endpoint(config_info: ConfigYAML):
|
||||||
return {"hello": "world"}
|
return {"hello": "world"}
|
||||||
|
|
||||||
|
|
||||||
|
#### BASIC ENDPOINTS ####
|
||||||
@router.get(
|
@router.get(
|
||||||
"/test",
|
"/test",
|
||||||
tags=["health"],
|
tags=["health"],
|
||||||
|
|
|
@ -252,8 +252,8 @@ class ProxyLogging:
|
||||||
"""
|
"""
|
||||||
Runs the CustomLogger's async_moderation_hook()
|
Runs the CustomLogger's async_moderation_hook()
|
||||||
"""
|
"""
|
||||||
for callback in litellm.callbacks:
|
|
||||||
new_data = copy.deepcopy(data)
|
new_data = copy.deepcopy(data)
|
||||||
|
for callback in litellm.callbacks:
|
||||||
try:
|
try:
|
||||||
if isinstance(callback, CustomLogger):
|
if isinstance(callback, CustomLogger):
|
||||||
await callback.async_moderation_hook(
|
await callback.async_moderation_hook(
|
||||||
|
@ -418,9 +418,14 @@ class ProxyLogging:
|
||||||
|
|
||||||
Related issue - https://github.com/BerriAI/litellm/issues/3395
|
Related issue - https://github.com/BerriAI/litellm/issues/3395
|
||||||
"""
|
"""
|
||||||
|
litellm_debug_info = getattr(original_exception, "litellm_debug_info", None)
|
||||||
|
exception_str = str(original_exception)
|
||||||
|
if litellm_debug_info is not None:
|
||||||
|
exception_str += litellm_debug_info
|
||||||
|
|
||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
self.alerting_handler(
|
self.alerting_handler(
|
||||||
message=f"LLM API call failed: {str(original_exception)}",
|
message=f"LLM API call failed: {exception_str}",
|
||||||
level="High",
|
level="High",
|
||||||
alert_type="llm_exceptions",
|
alert_type="llm_exceptions",
|
||||||
request_data=request_data,
|
request_data=request_data,
|
||||||
|
|
|
@ -262,13 +262,22 @@ class Router:
|
||||||
|
|
||||||
self.retry_after = retry_after
|
self.retry_after = retry_after
|
||||||
self.routing_strategy = routing_strategy
|
self.routing_strategy = routing_strategy
|
||||||
self.fallbacks = fallbacks or litellm.fallbacks
|
|
||||||
|
## SETTING FALLBACKS ##
|
||||||
|
### validate if it's set + in correct format
|
||||||
|
_fallbacks = fallbacks or litellm.fallbacks
|
||||||
|
|
||||||
|
self.validate_fallbacks(fallback_param=_fallbacks)
|
||||||
|
### set fallbacks
|
||||||
|
self.fallbacks = _fallbacks
|
||||||
|
|
||||||
if default_fallbacks is not None or litellm.default_fallbacks is not None:
|
if default_fallbacks is not None or litellm.default_fallbacks is not None:
|
||||||
_fallbacks = default_fallbacks or litellm.default_fallbacks
|
_fallbacks = default_fallbacks or litellm.default_fallbacks
|
||||||
if self.fallbacks is not None:
|
if self.fallbacks is not None:
|
||||||
self.fallbacks.append({"*": _fallbacks})
|
self.fallbacks.append({"*": _fallbacks})
|
||||||
else:
|
else:
|
||||||
self.fallbacks = [{"*": _fallbacks}]
|
self.fallbacks = [{"*": _fallbacks}]
|
||||||
|
|
||||||
self.context_window_fallbacks = (
|
self.context_window_fallbacks = (
|
||||||
context_window_fallbacks or litellm.context_window_fallbacks
|
context_window_fallbacks or litellm.context_window_fallbacks
|
||||||
)
|
)
|
||||||
|
@ -336,6 +345,21 @@ class Router:
|
||||||
if self.alerting_config is not None:
|
if self.alerting_config is not None:
|
||||||
self._initialize_alerting()
|
self._initialize_alerting()
|
||||||
|
|
||||||
|
def validate_fallbacks(self, fallback_param: Optional[List]):
|
||||||
|
if fallback_param is None:
|
||||||
|
return
|
||||||
|
if len(fallback_param) > 0: # if set
|
||||||
|
## for dictionary in list, check if only 1 key in dict
|
||||||
|
for _dict in fallback_param:
|
||||||
|
assert isinstance(_dict, dict), "Item={}, not a dictionary".format(
|
||||||
|
_dict
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
len(_dict.keys()) == 1
|
||||||
|
), "Only 1 key allows in dictionary. You set={} for dict={}".format(
|
||||||
|
len(_dict.keys()), _dict
|
||||||
|
)
|
||||||
|
|
||||||
def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict):
|
def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict):
|
||||||
if routing_strategy == "least-busy":
|
if routing_strategy == "least-busy":
|
||||||
self.leastbusy_logger = LeastBusyLoggingHandler(
|
self.leastbusy_logger = LeastBusyLoggingHandler(
|
||||||
|
@ -1962,6 +1986,45 @@ class Router:
|
||||||
key=rpm_key, value=request_count, local_only=True
|
key=rpm_key, value=request_count, local_only=True
|
||||||
) # don't change existing ttl
|
) # don't change existing ttl
|
||||||
|
|
||||||
|
def _is_cooldown_required(self, exception_status: Union[str, int]):
|
||||||
|
"""
|
||||||
|
A function to determine if a cooldown is required based on the exception status.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
exception_status (Union[str, int]): The status of the exception.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if a cooldown is required, False otherwise.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
|
||||||
|
if isinstance(exception_status, str):
|
||||||
|
exception_status = int(exception_status)
|
||||||
|
|
||||||
|
if exception_status >= 400 and exception_status < 500:
|
||||||
|
if exception_status == 429:
|
||||||
|
# Cool down 429 Rate Limit Errors
|
||||||
|
return True
|
||||||
|
|
||||||
|
elif exception_status == 401:
|
||||||
|
# Cool down 401 Auth Errors
|
||||||
|
return True
|
||||||
|
|
||||||
|
elif exception_status == 408:
|
||||||
|
return True
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Do NOT cool down all other 4XX Errors
|
||||||
|
return False
|
||||||
|
|
||||||
|
else:
|
||||||
|
# should cool down for all other errors
|
||||||
|
return True
|
||||||
|
|
||||||
|
except:
|
||||||
|
# Catch all - if any exceptions default to cooling down
|
||||||
|
return True
|
||||||
|
|
||||||
def _set_cooldown_deployments(
|
def _set_cooldown_deployments(
|
||||||
self, exception_status: Union[str, int], deployment: Optional[str] = None
|
self, exception_status: Union[str, int], deployment: Optional[str] = None
|
||||||
):
|
):
|
||||||
|
@ -1975,6 +2038,9 @@ class Router:
|
||||||
if deployment is None:
|
if deployment is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if self._is_cooldown_required(exception_status=exception_status) == False:
|
||||||
|
return
|
||||||
|
|
||||||
dt = get_utc_datetime()
|
dt = get_utc_datetime()
|
||||||
current_minute = dt.strftime("%H-%M")
|
current_minute = dt.strftime("%H-%M")
|
||||||
# get current fails for deployment
|
# get current fails for deployment
|
||||||
|
|
|
@ -27,7 +27,7 @@ class LiteLLMBase(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
class RoutingArgs(LiteLLMBase):
|
class RoutingArgs(LiteLLMBase):
|
||||||
ttl: int = 1 * 60 * 60 # 1 hour
|
ttl: float = 1 * 60 * 60 # 1 hour
|
||||||
lowest_latency_buffer: float = 0
|
lowest_latency_buffer: float = 0
|
||||||
max_latency_list_size: int = 10
|
max_latency_list_size: int = 10
|
||||||
|
|
||||||
|
|
|
@ -376,9 +376,8 @@ def test_vertex_ai_stream():
|
||||||
print("making request", model)
|
print("making request", model)
|
||||||
response = completion(
|
response = completion(
|
||||||
model=model,
|
model=model,
|
||||||
messages=[
|
messages=[{"role": "user", "content": "hello tell me a short story"}],
|
||||||
{"role": "user", "content": "write 10 line code code for saying hi"}
|
max_tokens=15,
|
||||||
],
|
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
completed_str = ""
|
completed_str = ""
|
||||||
|
|
|
@ -38,7 +38,7 @@ def reset_callbacks():
|
||||||
@pytest.mark.skip(reason="Local test")
|
@pytest.mark.skip(reason="Local test")
|
||||||
def test_response_model_none():
|
def test_response_model_none():
|
||||||
"""
|
"""
|
||||||
Addresses - https://github.com/BerriAI/litellm/issues/2972
|
Addresses:https://github.com/BerriAI/litellm/issues/2972
|
||||||
"""
|
"""
|
||||||
x = completion(
|
x = completion(
|
||||||
model="mymodel",
|
model="mymodel",
|
||||||
|
|
|
@ -5,7 +5,6 @@
|
||||||
import sys, os
|
import sys, os
|
||||||
import traceback
|
import traceback
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from pydantic import ConfigDict
|
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
import os, io
|
import os, io
|
||||||
|
@ -14,36 +13,21 @@ sys.path.insert(
|
||||||
0, os.path.abspath("../..")
|
0, os.path.abspath("../..")
|
||||||
) # Adds the parent directory to the, system path
|
) # Adds the parent directory to the, system path
|
||||||
import pytest, litellm
|
import pytest, litellm
|
||||||
from pydantic import BaseModel, VERSION
|
from pydantic import BaseModel
|
||||||
from litellm.proxy.proxy_server import ProxyConfig
|
from litellm.proxy.proxy_server import ProxyConfig
|
||||||
from litellm.proxy.utils import encrypt_value, ProxyLogging, DualCache
|
from litellm.proxy.utils import encrypt_value, ProxyLogging, DualCache
|
||||||
from litellm.types.router import Deployment, LiteLLM_Params, ModelInfo
|
from litellm.types.router import Deployment, LiteLLM_Params, ModelInfo
|
||||||
from typing import Literal
|
from typing import Literal
|
||||||
|
|
||||||
|
|
||||||
# Function to get Pydantic version
|
|
||||||
def is_pydantic_v2() -> int:
|
|
||||||
return int(VERSION.split(".")[0])
|
|
||||||
|
|
||||||
|
|
||||||
def get_model_config(arbitrary_types_allowed: bool = False) -> ConfigDict:
|
|
||||||
# Version-specific configuration
|
|
||||||
if is_pydantic_v2() >= 2:
|
|
||||||
model_config = ConfigDict(extra="allow", arbitrary_types_allowed=arbitrary_types_allowed, protected_namespaces=()) # type: ignore
|
|
||||||
else:
|
|
||||||
from pydantic import Extra
|
|
||||||
|
|
||||||
model_config = ConfigDict(extra=Extra.allow, arbitrary_types_allowed=arbitrary_types_allowed) # type: ignore
|
|
||||||
|
|
||||||
return model_config
|
|
||||||
|
|
||||||
|
|
||||||
class DBModel(BaseModel):
|
class DBModel(BaseModel):
|
||||||
model_id: str
|
model_id: str
|
||||||
model_name: str
|
model_name: str
|
||||||
model_info: dict
|
model_info: dict
|
||||||
litellm_params: dict
|
litellm_params: dict
|
||||||
model_config = get_model_config()
|
|
||||||
|
class Config:
|
||||||
|
protected_namespaces = ()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
|
@ -53,13 +53,6 @@ async def test_content_policy_exception_azure():
|
||||||
except litellm.ContentPolicyViolationError as e:
|
except litellm.ContentPolicyViolationError as e:
|
||||||
print("caught a content policy violation error! Passed")
|
print("caught a content policy violation error! Passed")
|
||||||
print("exception", e)
|
print("exception", e)
|
||||||
|
|
||||||
# assert that the first 100 chars of the message is returned in the exception
|
|
||||||
assert (
|
|
||||||
"Messages: [{'role': 'user', 'content': 'where do I buy lethal drugs from'}]"
|
|
||||||
in str(e)
|
|
||||||
)
|
|
||||||
assert "Model: azure/chatgpt-v-2" in str(e)
|
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"An exception occurred - {str(e)}")
|
pytest.fail(f"An exception occurred - {str(e)}")
|
||||||
|
@ -585,9 +578,6 @@ def test_router_completion_vertex_exception():
|
||||||
pytest.fail("Request should have failed - bad api key")
|
pytest.fail("Request should have failed - bad api key")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("exception: ", e)
|
print("exception: ", e)
|
||||||
assert "Model: gemini-pro" in str(e)
|
|
||||||
assert "model_group: vertex-gemini-pro" in str(e)
|
|
||||||
assert "deployment: vertex_ai/gemini-pro" in str(e)
|
|
||||||
|
|
||||||
|
|
||||||
def test_litellm_completion_vertex_exception():
|
def test_litellm_completion_vertex_exception():
|
||||||
|
@ -604,8 +594,26 @@ def test_litellm_completion_vertex_exception():
|
||||||
pytest.fail("Request should have failed - bad api key")
|
pytest.fail("Request should have failed - bad api key")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("exception: ", e)
|
print("exception: ", e)
|
||||||
assert "Model: gemini-pro" in str(e)
|
|
||||||
assert "vertex_project: bad-project" in str(e)
|
|
||||||
|
def test_litellm_predibase_exception():
|
||||||
|
"""
|
||||||
|
Test - Assert that the Predibase API Key is not returned on Authentication Errors
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
litellm.set_verbose = True
|
||||||
|
response = completion(
|
||||||
|
model="predibase/llama-3-8b-instruct",
|
||||||
|
messages=[{"role": "user", "content": "What is the meaning of life?"}],
|
||||||
|
tenant_id="c4768f95",
|
||||||
|
api_key="hf-rawapikey",
|
||||||
|
)
|
||||||
|
pytest.fail("Request should have failed - bad api key")
|
||||||
|
except Exception as e:
|
||||||
|
assert "hf-rawapikey" not in str(e)
|
||||||
|
print("exception: ", e)
|
||||||
|
|
||||||
|
|
||||||
# # test_invalid_request_error(model="command-nightly")
|
# # test_invalid_request_error(model="command-nightly")
|
||||||
|
|
|
@ -105,6 +105,9 @@ def test_parallel_function_call(model):
|
||||||
# Step 4: send the info for each function call and function response to the model
|
# Step 4: send the info for each function call and function response to the model
|
||||||
for tool_call in tool_calls:
|
for tool_call in tool_calls:
|
||||||
function_name = tool_call.function.name
|
function_name = tool_call.function.name
|
||||||
|
if function_name not in available_functions:
|
||||||
|
# the model called a function that does not exist in available_functions - don't try calling anything
|
||||||
|
return
|
||||||
function_to_call = available_functions[function_name]
|
function_to_call = available_functions[function_name]
|
||||||
function_args = json.loads(tool_call.function.arguments)
|
function_args = json.loads(tool_call.function.arguments)
|
||||||
function_response = function_to_call(
|
function_response = function_to_call(
|
||||||
|
@ -124,7 +127,6 @@ def test_parallel_function_call(model):
|
||||||
model=model, messages=messages, temperature=0.2, seed=22
|
model=model, messages=messages, temperature=0.2, seed=22
|
||||||
) # get a new response from the model where it can see the function response
|
) # get a new response from the model where it can see the function response
|
||||||
print("second response\n", second_response)
|
print("second response\n", second_response)
|
||||||
return second_response
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
#### What this tests ####
|
#### What this tests ####
|
||||||
# Unit tests for JWT-Auth
|
# Unit tests for JWT-Auth
|
||||||
|
|
||||||
import sys, os, asyncio, time, random
|
import sys, os, asyncio, time, random, uuid
|
||||||
import traceback
|
import traceback
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
@ -24,6 +24,7 @@ public_key = {
|
||||||
"alg": "RS256",
|
"alg": "RS256",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def test_load_config_with_custom_role_names():
|
def test_load_config_with_custom_role_names():
|
||||||
config = {
|
config = {
|
||||||
"general_settings": {
|
"general_settings": {
|
||||||
|
@ -77,7 +78,8 @@ async def test_token_single_public_key():
|
||||||
== "qIgOQfEVrrErJC0E7gsHXi6rs_V0nyFY5qPFui2-tv0o4CwpwDzgfBtLO7o_wLiguq0lnu54sMT2eLNoRiiPuLvv6bg7Iy1H9yc5_4Jf5oYEOrqN5o9ZBOoYp1q68Pv0oNJYyZdGu5ZJfd7V4y953vB2XfEKgXCsAkhVhlvIUMiDNKWoMDWsyb2xela5tRURZ2mJAXcHfSC_sYdZxIA2YYrIHfoevq_vTlaz0qVSe_uOKjEpgOAS08UUrgda4CQL11nzICiIQzc6qmjIQt2cjzB2D_9zb4BYndzEtfl0kwAT0z_I85S3mkwTqHU-1BvKe_4MG4VG3dAAeffLPXJyXQ"
|
== "qIgOQfEVrrErJC0E7gsHXi6rs_V0nyFY5qPFui2-tv0o4CwpwDzgfBtLO7o_wLiguq0lnu54sMT2eLNoRiiPuLvv6bg7Iy1H9yc5_4Jf5oYEOrqN5o9ZBOoYp1q68Pv0oNJYyZdGu5ZJfd7V4y953vB2XfEKgXCsAkhVhlvIUMiDNKWoMDWsyb2xela5tRURZ2mJAXcHfSC_sYdZxIA2YYrIHfoevq_vTlaz0qVSe_uOKjEpgOAS08UUrgda4CQL11nzICiIQzc6qmjIQt2cjzB2D_9zb4BYndzEtfl0kwAT0z_I85S3mkwTqHU-1BvKe_4MG4VG3dAAeffLPXJyXQ"
|
||||||
)
|
)
|
||||||
|
|
||||||
@pytest.mark.parametrize('audience', [None, "litellm-proxy"])
|
|
||||||
|
@pytest.mark.parametrize("audience", [None, "litellm-proxy"])
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_valid_invalid_token(audience):
|
async def test_valid_invalid_token(audience):
|
||||||
"""
|
"""
|
||||||
|
@ -90,7 +92,7 @@ async def test_valid_invalid_token(audience):
|
||||||
from cryptography.hazmat.primitives.asymmetric import rsa
|
from cryptography.hazmat.primitives.asymmetric import rsa
|
||||||
from cryptography.hazmat.backends import default_backend
|
from cryptography.hazmat.backends import default_backend
|
||||||
|
|
||||||
os.environ.pop('JWT_AUDIENCE', None)
|
os.environ.pop("JWT_AUDIENCE", None)
|
||||||
if audience:
|
if audience:
|
||||||
os.environ["JWT_AUDIENCE"] = audience
|
os.environ["JWT_AUDIENCE"] = audience
|
||||||
|
|
||||||
|
@ -138,7 +140,7 @@ async def test_valid_invalid_token(audience):
|
||||||
"sub": "user123",
|
"sub": "user123",
|
||||||
"exp": expiration_time, # set the token to expire in 10 minutes
|
"exp": expiration_time, # set the token to expire in 10 minutes
|
||||||
"scope": "litellm-proxy-admin",
|
"scope": "litellm-proxy-admin",
|
||||||
"aud": audience
|
"aud": audience,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Generate the JWT token
|
# Generate the JWT token
|
||||||
|
@ -166,7 +168,7 @@ async def test_valid_invalid_token(audience):
|
||||||
"sub": "user123",
|
"sub": "user123",
|
||||||
"exp": expiration_time, # set the token to expire in 10 minutes
|
"exp": expiration_time, # set the token to expire in 10 minutes
|
||||||
"scope": "litellm-NO-SCOPE",
|
"scope": "litellm-NO-SCOPE",
|
||||||
"aud": audience
|
"aud": audience,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Generate the JWT token
|
# Generate the JWT token
|
||||||
|
@ -183,6 +185,7 @@ async def test_valid_invalid_token(audience):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"An exception occurred - {str(e)}")
|
pytest.fail(f"An exception occurred - {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def prisma_client():
|
def prisma_client():
|
||||||
import litellm
|
import litellm
|
||||||
|
@ -205,7 +208,7 @@ def prisma_client():
|
||||||
return prisma_client
|
return prisma_client
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('audience', [None, "litellm-proxy"])
|
@pytest.mark.parametrize("audience", [None, "litellm-proxy"])
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_team_token_output(prisma_client, audience):
|
async def test_team_token_output(prisma_client, audience):
|
||||||
import jwt, json
|
import jwt, json
|
||||||
|
@ -222,7 +225,7 @@ async def test_team_token_output(prisma_client, audience):
|
||||||
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
||||||
await litellm.proxy.proxy_server.prisma_client.connect()
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||||
|
|
||||||
os.environ.pop('JWT_AUDIENCE', None)
|
os.environ.pop("JWT_AUDIENCE", None)
|
||||||
if audience:
|
if audience:
|
||||||
os.environ["JWT_AUDIENCE"] = audience
|
os.environ["JWT_AUDIENCE"] = audience
|
||||||
|
|
||||||
|
@ -261,7 +264,7 @@ async def test_team_token_output(prisma_client, audience):
|
||||||
|
|
||||||
jwt_handler.user_api_key_cache = cache
|
jwt_handler.user_api_key_cache = cache
|
||||||
|
|
||||||
jwt_handler.litellm_jwtauth = LiteLLM_JWTAuth()
|
jwt_handler.litellm_jwtauth = LiteLLM_JWTAuth(team_id_jwt_field="client_id")
|
||||||
|
|
||||||
# VALID TOKEN
|
# VALID TOKEN
|
||||||
## GENERATE A TOKEN
|
## GENERATE A TOKEN
|
||||||
|
@ -274,7 +277,7 @@ async def test_team_token_output(prisma_client, audience):
|
||||||
"exp": expiration_time, # set the token to expire in 10 minutes
|
"exp": expiration_time, # set the token to expire in 10 minutes
|
||||||
"scope": "litellm_team",
|
"scope": "litellm_team",
|
||||||
"client_id": team_id,
|
"client_id": team_id,
|
||||||
"aud": audience
|
"aud": audience,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Generate the JWT token
|
# Generate the JWT token
|
||||||
|
@ -289,7 +292,7 @@ async def test_team_token_output(prisma_client, audience):
|
||||||
"sub": "user123",
|
"sub": "user123",
|
||||||
"exp": expiration_time, # set the token to expire in 10 minutes
|
"exp": expiration_time, # set the token to expire in 10 minutes
|
||||||
"scope": "litellm_proxy_admin",
|
"scope": "litellm_proxy_admin",
|
||||||
"aud": audience
|
"aud": audience,
|
||||||
}
|
}
|
||||||
|
|
||||||
admin_token = jwt.encode(payload, private_key_str, algorithm="RS256")
|
admin_token = jwt.encode(payload, private_key_str, algorithm="RS256")
|
||||||
|
@ -315,7 +318,13 @@ async def test_team_token_output(prisma_client, audience):
|
||||||
|
|
||||||
## 1. INITIAL TEAM CALL - should fail
|
## 1. INITIAL TEAM CALL - should fail
|
||||||
# use generated key to auth in
|
# use generated key to auth in
|
||||||
setattr(litellm.proxy.proxy_server, "general_settings", {"enable_jwt_auth": True})
|
setattr(
|
||||||
|
litellm.proxy.proxy_server,
|
||||||
|
"general_settings",
|
||||||
|
{
|
||||||
|
"enable_jwt_auth": True,
|
||||||
|
},
|
||||||
|
)
|
||||||
setattr(litellm.proxy.proxy_server, "jwt_handler", jwt_handler)
|
setattr(litellm.proxy.proxy_server, "jwt_handler", jwt_handler)
|
||||||
try:
|
try:
|
||||||
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
||||||
|
@ -358,9 +367,22 @@ async def test_team_token_output(prisma_client, audience):
|
||||||
assert team_result.team_models == ["gpt-3.5-turbo", "gpt-4"]
|
assert team_result.team_models == ["gpt-3.5-turbo", "gpt-4"]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('audience', [None, "litellm-proxy"])
|
@pytest.mark.parametrize("audience", [None, "litellm-proxy"])
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"team_id_set, default_team_id",
|
||||||
|
[(True, False), (False, True)],
|
||||||
|
)
|
||||||
|
@pytest.mark.parametrize("user_id_upsert", [True, False])
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_user_token_output(prisma_client, audience):
|
async def test_user_token_output(
|
||||||
|
prisma_client, audience, team_id_set, default_team_id, user_id_upsert
|
||||||
|
):
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
args = locals()
|
||||||
|
print(f"received args - {args}")
|
||||||
|
if default_team_id:
|
||||||
|
default_team_id = "team_id_12344_{}".format(uuid.uuid4())
|
||||||
"""
|
"""
|
||||||
- If user required, check if it exists
|
- If user required, check if it exists
|
||||||
- fail initial request (when user doesn't exist)
|
- fail initial request (when user doesn't exist)
|
||||||
|
@ -373,7 +395,12 @@ async def test_user_token_output(prisma_client, audience):
|
||||||
from cryptography.hazmat.backends import default_backend
|
from cryptography.hazmat.backends import default_backend
|
||||||
from fastapi import Request
|
from fastapi import Request
|
||||||
from starlette.datastructures import URL
|
from starlette.datastructures import URL
|
||||||
from litellm.proxy.proxy_server import user_api_key_auth, new_team, new_user
|
from litellm.proxy.proxy_server import (
|
||||||
|
user_api_key_auth,
|
||||||
|
new_team,
|
||||||
|
new_user,
|
||||||
|
user_info,
|
||||||
|
)
|
||||||
from litellm.proxy._types import NewTeamRequest, UserAPIKeyAuth, NewUserRequest
|
from litellm.proxy._types import NewTeamRequest, UserAPIKeyAuth, NewUserRequest
|
||||||
import litellm
|
import litellm
|
||||||
import uuid
|
import uuid
|
||||||
|
@ -381,7 +408,7 @@ async def test_user_token_output(prisma_client, audience):
|
||||||
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
||||||
await litellm.proxy.proxy_server.prisma_client.connect()
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||||
|
|
||||||
os.environ.pop('JWT_AUDIENCE', None)
|
os.environ.pop("JWT_AUDIENCE", None)
|
||||||
if audience:
|
if audience:
|
||||||
os.environ["JWT_AUDIENCE"] = audience
|
os.environ["JWT_AUDIENCE"] = audience
|
||||||
|
|
||||||
|
@ -423,6 +450,11 @@ async def test_user_token_output(prisma_client, audience):
|
||||||
jwt_handler.litellm_jwtauth = LiteLLM_JWTAuth()
|
jwt_handler.litellm_jwtauth = LiteLLM_JWTAuth()
|
||||||
|
|
||||||
jwt_handler.litellm_jwtauth.user_id_jwt_field = "sub"
|
jwt_handler.litellm_jwtauth.user_id_jwt_field = "sub"
|
||||||
|
jwt_handler.litellm_jwtauth.team_id_default = default_team_id
|
||||||
|
jwt_handler.litellm_jwtauth.user_id_upsert = user_id_upsert
|
||||||
|
|
||||||
|
if team_id_set:
|
||||||
|
jwt_handler.litellm_jwtauth.team_id_jwt_field = "client_id"
|
||||||
|
|
||||||
# VALID TOKEN
|
# VALID TOKEN
|
||||||
## GENERATE A TOKEN
|
## GENERATE A TOKEN
|
||||||
|
@ -436,7 +468,7 @@ async def test_user_token_output(prisma_client, audience):
|
||||||
"exp": expiration_time, # set the token to expire in 10 minutes
|
"exp": expiration_time, # set the token to expire in 10 minutes
|
||||||
"scope": "litellm_team",
|
"scope": "litellm_team",
|
||||||
"client_id": team_id,
|
"client_id": team_id,
|
||||||
"aud": audience
|
"aud": audience,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Generate the JWT token
|
# Generate the JWT token
|
||||||
|
@ -451,7 +483,7 @@ async def test_user_token_output(prisma_client, audience):
|
||||||
"sub": user_id,
|
"sub": user_id,
|
||||||
"exp": expiration_time, # set the token to expire in 10 minutes
|
"exp": expiration_time, # set the token to expire in 10 minutes
|
||||||
"scope": "litellm_proxy_admin",
|
"scope": "litellm_proxy_admin",
|
||||||
"aud": audience
|
"aud": audience,
|
||||||
}
|
}
|
||||||
|
|
||||||
admin_token = jwt.encode(payload, private_key_str, algorithm="RS256")
|
admin_token = jwt.encode(payload, private_key_str, algorithm="RS256")
|
||||||
|
@ -503,6 +535,16 @@ async def test_user_token_output(prisma_client, audience):
|
||||||
),
|
),
|
||||||
user_api_key_dict=result,
|
user_api_key_dict=result,
|
||||||
)
|
)
|
||||||
|
if default_team_id:
|
||||||
|
await new_team(
|
||||||
|
data=NewTeamRequest(
|
||||||
|
team_id=default_team_id,
|
||||||
|
tpm_limit=100,
|
||||||
|
rpm_limit=99,
|
||||||
|
models=["gpt-3.5-turbo", "gpt-4"],
|
||||||
|
),
|
||||||
|
user_api_key_dict=result,
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"This should not fail - {str(e)}")
|
pytest.fail(f"This should not fail - {str(e)}")
|
||||||
|
|
||||||
|
@ -513,11 +555,23 @@ async def test_user_token_output(prisma_client, audience):
|
||||||
team_result: UserAPIKeyAuth = await user_api_key_auth(
|
team_result: UserAPIKeyAuth = await user_api_key_auth(
|
||||||
request=request, api_key=bearer_token
|
request=request, api_key=bearer_token
|
||||||
)
|
)
|
||||||
|
if user_id_upsert == False:
|
||||||
pytest.fail(f"User doesn't exist. this should fail")
|
pytest.fail(f"User doesn't exist. this should fail")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
## 4. Create user
|
## 4. Create user
|
||||||
|
if user_id_upsert:
|
||||||
|
## check if user already exists
|
||||||
|
try:
|
||||||
|
bearer_token = "Bearer " + admin_token
|
||||||
|
|
||||||
|
request._url = URL(url="/team/new")
|
||||||
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
||||||
|
await user_info(user_id=user_id)
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"This should not fail - {str(e)}")
|
||||||
|
else:
|
||||||
try:
|
try:
|
||||||
bearer_token = "Bearer " + admin_token
|
bearer_token = "Bearer " + admin_token
|
||||||
|
|
||||||
|
@ -543,6 +597,7 @@ async def test_user_token_output(prisma_client, audience):
|
||||||
|
|
||||||
## 6. ASSERT USER_API_KEY_AUTH format (used for tpm/rpm limiting in parallel_request_limiter.py AND cost tracking)
|
## 6. ASSERT USER_API_KEY_AUTH format (used for tpm/rpm limiting in parallel_request_limiter.py AND cost tracking)
|
||||||
|
|
||||||
|
if team_id_set or default_team_id is not None:
|
||||||
assert team_result.team_tpm_limit == 100
|
assert team_result.team_tpm_limit == 100
|
||||||
assert team_result.team_rpm_limit == 99
|
assert team_result.team_rpm_limit == 99
|
||||||
assert team_result.team_models == ["gpt-3.5-turbo", "gpt-4"]
|
assert team_result.team_models == ["gpt-3.5-turbo", "gpt-4"]
|
||||||
|
|
|
@ -705,7 +705,7 @@ async def test_lowest_latency_routing_first_pick():
|
||||||
) # type: ignore
|
) # type: ignore
|
||||||
|
|
||||||
deployments = {}
|
deployments = {}
|
||||||
for _ in range(5):
|
for _ in range(10):
|
||||||
response = await router.acompletion(
|
response = await router.acompletion(
|
||||||
model="azure-model", messages=[{"role": "user", "content": "hello"}]
|
model="azure-model", messages=[{"role": "user", "content": "hello"}]
|
||||||
)
|
)
|
||||||
|
|
|
@ -28,6 +28,37 @@ from datetime import datetime
|
||||||
## On Request failure
|
## On Request failure
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_global_max_parallel_requests():
|
||||||
|
"""
|
||||||
|
Test if ParallelRequestHandler respects 'global_max_parallel_requests'
|
||||||
|
|
||||||
|
data["metadata"]["global_max_parallel_requests"]
|
||||||
|
"""
|
||||||
|
global_max_parallel_requests = 0
|
||||||
|
_api_key = "sk-12345"
|
||||||
|
_api_key = hash_token("sk-12345")
|
||||||
|
user_api_key_dict = UserAPIKeyAuth(api_key=_api_key, max_parallel_requests=100)
|
||||||
|
local_cache = DualCache()
|
||||||
|
parallel_request_handler = MaxParallelRequestsHandler()
|
||||||
|
|
||||||
|
for _ in range(3):
|
||||||
|
try:
|
||||||
|
await parallel_request_handler.async_pre_call_hook(
|
||||||
|
user_api_key_dict=user_api_key_dict,
|
||||||
|
cache=local_cache,
|
||||||
|
data={
|
||||||
|
"metadata": {
|
||||||
|
"global_max_parallel_requests": global_max_parallel_requests
|
||||||
|
}
|
||||||
|
},
|
||||||
|
call_type="",
|
||||||
|
)
|
||||||
|
pytest.fail("Expected call to fail")
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_pre_call_hook():
|
async def test_pre_call_hook():
|
||||||
"""
|
"""
|
||||||
|
|
64
litellm/tests/test_router_cooldowns.py
Normal file
64
litellm/tests/test_router_cooldowns.py
Normal file
|
@ -0,0 +1,64 @@
|
||||||
|
#### What this tests ####
|
||||||
|
# This tests calling router with fallback models
|
||||||
|
|
||||||
|
import sys, os, time
|
||||||
|
import traceback, asyncio
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm import Router
|
||||||
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
|
import openai, httpx
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_cooldown_badrequest_error():
|
||||||
|
"""
|
||||||
|
Test 1. It SHOULD NOT cooldown a deployment on a BadRequestError
|
||||||
|
"""
|
||||||
|
|
||||||
|
router = litellm.Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "azure/chatgpt-v-2",
|
||||||
|
"api_key": os.getenv("AZURE_API_KEY"),
|
||||||
|
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||||
|
"api_base": os.getenv("AZURE_API_BASE"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
debug_level="DEBUG",
|
||||||
|
set_verbose=True,
|
||||||
|
cooldown_time=300,
|
||||||
|
num_retries=0,
|
||||||
|
allowed_fails=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Act & Assert
|
||||||
|
try:
|
||||||
|
|
||||||
|
response = await router.acompletion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "gm"}],
|
||||||
|
bad_param=200,
|
||||||
|
)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
await asyncio.sleep(3) # wait for deployment to get cooled-down
|
||||||
|
|
||||||
|
response = await router.acompletion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "gm"}],
|
||||||
|
mock_response="hello",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response is not None
|
||||||
|
|
||||||
|
print(response)
|
|
@ -82,7 +82,7 @@ def test_async_fallbacks(caplog):
|
||||||
# Define the expected log messages
|
# Define the expected log messages
|
||||||
# - error request, falling back notice, success notice
|
# - error request, falling back notice, success notice
|
||||||
expected_logs = [
|
expected_logs = [
|
||||||
"litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}} \nModel: gpt-3.5-turbo\nAPI Base: https://api.openai.com\nMessages: [{'content': 'Hello, how are you?', 'role': 'user'}]\nmodel_group: gpt-3.5-turbo\n\ndeployment: gpt-3.5-turbo\n\x1b[0m",
|
"litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}\x1b[0m",
|
||||||
"Falling back to model_group = azure/gpt-3.5-turbo",
|
"Falling back to model_group = azure/gpt-3.5-turbo",
|
||||||
"litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m",
|
"litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m",
|
||||||
"Successful fallback b/w models.",
|
"Successful fallback b/w models.",
|
||||||
|
|
|
@ -1,27 +1,10 @@
|
||||||
from typing import List, Optional, Union, Iterable, cast
|
from typing import List, Optional, Union, Iterable
|
||||||
|
|
||||||
from pydantic import ConfigDict, BaseModel, validator, VERSION
|
from pydantic import BaseModel, validator
|
||||||
|
|
||||||
from typing_extensions import Literal, Required, TypedDict
|
from typing_extensions import Literal, Required, TypedDict
|
||||||
|
|
||||||
|
|
||||||
# Function to get Pydantic version
|
|
||||||
def is_pydantic_v2() -> int:
|
|
||||||
return int(VERSION.split(".")[0])
|
|
||||||
|
|
||||||
|
|
||||||
def get_model_config() -> ConfigDict:
|
|
||||||
# Version-specific configuration
|
|
||||||
if is_pydantic_v2() >= 2:
|
|
||||||
model_config = ConfigDict(extra="allow", protected_namespaces=()) # type: ignore
|
|
||||||
else:
|
|
||||||
from pydantic import Extra
|
|
||||||
|
|
||||||
model_config = ConfigDict(extra=Extra.allow) # type: ignore
|
|
||||||
|
|
||||||
return model_config
|
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionSystemMessageParam(TypedDict, total=False):
|
class ChatCompletionSystemMessageParam(TypedDict, total=False):
|
||||||
content: Required[str]
|
content: Required[str]
|
||||||
"""The contents of the system message."""
|
"""The contents of the system message."""
|
||||||
|
@ -208,4 +191,6 @@ class CompletionRequest(BaseModel):
|
||||||
api_key: Optional[str] = None
|
api_key: Optional[str] = None
|
||||||
model_list: Optional[List[str]] = None
|
model_list: Optional[List[str]] = None
|
||||||
|
|
||||||
model_config = get_model_config()
|
class Config:
|
||||||
|
extra = "allow"
|
||||||
|
protected_namespaces = ()
|
||||||
|
|
|
@ -1,23 +1,6 @@
|
||||||
from typing import List, Optional, Union
|
from typing import List, Optional, Union
|
||||||
|
|
||||||
from pydantic import ConfigDict, BaseModel, validator, VERSION
|
from pydantic import BaseModel, validator
|
||||||
|
|
||||||
|
|
||||||
# Function to get Pydantic version
|
|
||||||
def is_pydantic_v2() -> int:
|
|
||||||
return int(VERSION.split(".")[0])
|
|
||||||
|
|
||||||
|
|
||||||
def get_model_config(arbitrary_types_allowed: bool = False) -> ConfigDict:
|
|
||||||
# Version-specific configuration
|
|
||||||
if is_pydantic_v2() >= 2:
|
|
||||||
model_config = ConfigDict(extra="allow", arbitrary_types_allowed=arbitrary_types_allowed, protected_namespaces=()) # type: ignore
|
|
||||||
else:
|
|
||||||
from pydantic import Extra
|
|
||||||
|
|
||||||
model_config = ConfigDict(extra=Extra.allow, arbitrary_types_allowed=arbitrary_types_allowed) # type: ignore
|
|
||||||
|
|
||||||
return model_config
|
|
||||||
|
|
||||||
|
|
||||||
class EmbeddingRequest(BaseModel):
|
class EmbeddingRequest(BaseModel):
|
||||||
|
@ -34,4 +17,7 @@ class EmbeddingRequest(BaseModel):
|
||||||
litellm_call_id: Optional[str] = None
|
litellm_call_id: Optional[str] = None
|
||||||
litellm_logging_obj: Optional[dict] = None
|
litellm_logging_obj: Optional[dict] = None
|
||||||
logger_fn: Optional[str] = None
|
logger_fn: Optional[str] = None
|
||||||
model_config = get_model_config()
|
|
||||||
|
class Config:
|
||||||
|
# allow kwargs
|
||||||
|
extra = "allow"
|
||||||
|
|
|
@ -1,42 +1,19 @@
|
||||||
from typing import List, Optional, Union, Dict, Tuple, Literal, TypedDict
|
from typing import List, Optional, Union, Dict, Tuple, Literal, TypedDict
|
||||||
import httpx
|
import httpx
|
||||||
from pydantic import (
|
from pydantic import BaseModel, validator, Field
|
||||||
ConfigDict,
|
|
||||||
BaseModel,
|
|
||||||
validator,
|
|
||||||
Field,
|
|
||||||
__version__ as pydantic_version,
|
|
||||||
VERSION,
|
|
||||||
)
|
|
||||||
from .completion import CompletionRequest
|
from .completion import CompletionRequest
|
||||||
from .embedding import EmbeddingRequest
|
from .embedding import EmbeddingRequest
|
||||||
import uuid, enum
|
import uuid, enum
|
||||||
|
|
||||||
|
|
||||||
# Function to get Pydantic version
|
|
||||||
def is_pydantic_v2() -> int:
|
|
||||||
return int(VERSION.split(".")[0])
|
|
||||||
|
|
||||||
|
|
||||||
def get_model_config(arbitrary_types_allowed: bool = False) -> ConfigDict:
|
|
||||||
# Version-specific configuration
|
|
||||||
if is_pydantic_v2() >= 2:
|
|
||||||
model_config = ConfigDict(extra="allow", arbitrary_types_allowed=arbitrary_types_allowed, protected_namespaces=()) # type: ignore
|
|
||||||
else:
|
|
||||||
from pydantic import Extra
|
|
||||||
|
|
||||||
model_config = ConfigDict(extra=Extra.allow, arbitrary_types_allowed=arbitrary_types_allowed) # type: ignore
|
|
||||||
|
|
||||||
return model_config
|
|
||||||
|
|
||||||
|
|
||||||
class ModelConfig(BaseModel):
|
class ModelConfig(BaseModel):
|
||||||
model_name: str
|
model_name: str
|
||||||
litellm_params: Union[CompletionRequest, EmbeddingRequest]
|
litellm_params: Union[CompletionRequest, EmbeddingRequest]
|
||||||
tpm: int
|
tpm: int
|
||||||
rpm: int
|
rpm: int
|
||||||
|
|
||||||
model_config = get_model_config()
|
class Config:
|
||||||
|
protected_namespaces = ()
|
||||||
|
|
||||||
|
|
||||||
class RouterConfig(BaseModel):
|
class RouterConfig(BaseModel):
|
||||||
|
@ -67,7 +44,8 @@ class RouterConfig(BaseModel):
|
||||||
"latency-based-routing",
|
"latency-based-routing",
|
||||||
] = "simple-shuffle"
|
] = "simple-shuffle"
|
||||||
|
|
||||||
model_config = get_model_config()
|
class Config:
|
||||||
|
protected_namespaces = ()
|
||||||
|
|
||||||
|
|
||||||
class UpdateRouterConfig(BaseModel):
|
class UpdateRouterConfig(BaseModel):
|
||||||
|
@ -87,7 +65,8 @@ class UpdateRouterConfig(BaseModel):
|
||||||
fallbacks: Optional[List[dict]] = None
|
fallbacks: Optional[List[dict]] = None
|
||||||
context_window_fallbacks: Optional[List[dict]] = None
|
context_window_fallbacks: Optional[List[dict]] = None
|
||||||
|
|
||||||
model_config = get_model_config()
|
class Config:
|
||||||
|
protected_namespaces = ()
|
||||||
|
|
||||||
|
|
||||||
class ModelInfo(BaseModel):
|
class ModelInfo(BaseModel):
|
||||||
|
@ -105,7 +84,8 @@ class ModelInfo(BaseModel):
|
||||||
id = str(id)
|
id = str(id)
|
||||||
super().__init__(id=id, **params)
|
super().__init__(id=id, **params)
|
||||||
|
|
||||||
model_config = get_model_config()
|
class Config:
|
||||||
|
extra = "allow"
|
||||||
|
|
||||||
def __contains__(self, key):
|
def __contains__(self, key):
|
||||||
# Define custom behavior for the 'in' operator
|
# Define custom behavior for the 'in' operator
|
||||||
|
@ -200,14 +180,8 @@ class GenericLiteLLMParams(BaseModel):
|
||||||
max_retries = int(max_retries) # cast to int
|
max_retries = int(max_retries) # cast to int
|
||||||
super().__init__(max_retries=max_retries, **args, **params)
|
super().__init__(max_retries=max_retries, **args, **params)
|
||||||
|
|
||||||
model_config = get_model_config(arbitrary_types_allowed=True)
|
|
||||||
if pydantic_version.startswith("1"):
|
|
||||||
# pydantic v2 warns about using a Config class.
|
|
||||||
# But without this, pydantic v1 will raise an error:
|
|
||||||
# RuntimeError: no validator found for <class 'openai.Timeout'>,
|
|
||||||
# see `arbitrary_types_allowed` in Config
|
|
||||||
# Putting arbitrary_types_allowed = True in the ConfigDict doesn't work in pydantic v1.
|
|
||||||
class Config:
|
class Config:
|
||||||
|
extra = "allow"
|
||||||
arbitrary_types_allowed = True
|
arbitrary_types_allowed = True
|
||||||
|
|
||||||
def __contains__(self, key):
|
def __contains__(self, key):
|
||||||
|
@ -267,15 +241,8 @@ class LiteLLM_Params(GenericLiteLLMParams):
|
||||||
max_retries = int(max_retries) # cast to int
|
max_retries = int(max_retries) # cast to int
|
||||||
super().__init__(max_retries=max_retries, **args, **params)
|
super().__init__(max_retries=max_retries, **args, **params)
|
||||||
|
|
||||||
model_config = get_model_config(arbitrary_types_allowed=True)
|
|
||||||
|
|
||||||
if pydantic_version.startswith("1"):
|
|
||||||
# pydantic v2 warns about using a Config class.
|
|
||||||
# But without this, pydantic v1 will raise an error:
|
|
||||||
# RuntimeError: no validator found for <class 'openai.Timeout'>,
|
|
||||||
# see `arbitrary_types_allowed` in Config
|
|
||||||
# Putting arbitrary_types_allowed = True in the ConfigDict doesn't work in pydantic v1.
|
|
||||||
class Config:
|
class Config:
|
||||||
|
extra = "allow"
|
||||||
arbitrary_types_allowed = True
|
arbitrary_types_allowed = True
|
||||||
|
|
||||||
def __contains__(self, key):
|
def __contains__(self, key):
|
||||||
|
@ -306,7 +273,8 @@ class updateDeployment(BaseModel):
|
||||||
litellm_params: Optional[updateLiteLLMParams] = None
|
litellm_params: Optional[updateLiteLLMParams] = None
|
||||||
model_info: Optional[ModelInfo] = None
|
model_info: Optional[ModelInfo] = None
|
||||||
|
|
||||||
model_config = get_model_config()
|
class Config:
|
||||||
|
protected_namespaces = ()
|
||||||
|
|
||||||
|
|
||||||
class LiteLLMParamsTypedDict(TypedDict, total=False):
|
class LiteLLMParamsTypedDict(TypedDict, total=False):
|
||||||
|
@ -380,7 +348,9 @@ class Deployment(BaseModel):
|
||||||
# if using pydantic v1
|
# if using pydantic v1
|
||||||
return self.dict(**kwargs)
|
return self.dict(**kwargs)
|
||||||
|
|
||||||
model_config = get_model_config()
|
class Config:
|
||||||
|
extra = "allow"
|
||||||
|
protected_namespaces = ()
|
||||||
|
|
||||||
def __contains__(self, key):
|
def __contains__(self, key):
|
||||||
# Define custom behavior for the 'in' operator
|
# Define custom behavior for the 'in' operator
|
||||||
|
|
174
litellm/utils.py
174
litellm/utils.py
|
@ -19,7 +19,7 @@ from functools import wraps, lru_cache
|
||||||
import datetime, time
|
import datetime, time
|
||||||
import tiktoken
|
import tiktoken
|
||||||
import uuid
|
import uuid
|
||||||
from pydantic import ConfigDict, BaseModel, VERSION
|
from pydantic import BaseModel
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import textwrap
|
import textwrap
|
||||||
import logging
|
import logging
|
||||||
|
@ -185,23 +185,6 @@ last_fetched_at_keys = None
|
||||||
# }
|
# }
|
||||||
|
|
||||||
|
|
||||||
# Function to get Pydantic version
|
|
||||||
def is_pydantic_v2() -> int:
|
|
||||||
return int(VERSION.split(".")[0])
|
|
||||||
|
|
||||||
|
|
||||||
def get_model_config(arbitrary_types_allowed: bool = False) -> ConfigDict:
|
|
||||||
# Version-specific configuration
|
|
||||||
if is_pydantic_v2() >= 2:
|
|
||||||
model_config = ConfigDict(extra="allow", arbitrary_types_allowed=arbitrary_types_allowed, protected_namespaces=()) # type: ignore
|
|
||||||
else:
|
|
||||||
from pydantic import Extra
|
|
||||||
|
|
||||||
model_config = ConfigDict(extra=Extra.allow, arbitrary_types_allowed=arbitrary_types_allowed) # type: ignore
|
|
||||||
|
|
||||||
return model_config
|
|
||||||
|
|
||||||
|
|
||||||
class UnsupportedParamsError(Exception):
|
class UnsupportedParamsError(Exception):
|
||||||
def __init__(self, status_code, message):
|
def __init__(self, status_code, message):
|
||||||
self.status_code = status_code
|
self.status_code = status_code
|
||||||
|
@ -348,7 +331,10 @@ class HiddenParams(OpenAIObject):
|
||||||
original_response: Optional[str] = None
|
original_response: Optional[str] = None
|
||||||
model_id: Optional[str] = None # used in Router for individual deployments
|
model_id: Optional[str] = None # used in Router for individual deployments
|
||||||
api_base: Optional[str] = None # returns api base used for making completion call
|
api_base: Optional[str] = None # returns api base used for making completion call
|
||||||
model_config = get_model_config()
|
|
||||||
|
class Config:
|
||||||
|
extra = "allow"
|
||||||
|
protected_namespaces = ()
|
||||||
|
|
||||||
def get(self, key, default=None):
|
def get(self, key, default=None):
|
||||||
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
# Custom .get() method to access attributes with a default value if the attribute doesn't exist
|
||||||
|
@ -2579,7 +2565,7 @@ class Logging:
|
||||||
response_obj=result,
|
response_obj=result,
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
)
|
) # type: ignore
|
||||||
if callable(callback): # custom logger functions
|
if callable(callback): # custom logger functions
|
||||||
await customLogger.async_log_event(
|
await customLogger.async_log_event(
|
||||||
kwargs=self.model_call_details,
|
kwargs=self.model_call_details,
|
||||||
|
@ -6778,7 +6764,7 @@ def get_max_tokens(model: str):
|
||||||
raise Exception()
|
raise Exception()
|
||||||
except:
|
except:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
|
f"Model {model} isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -8155,8 +8141,11 @@ def exception_type(
|
||||||
# Common Extra information needed for all providers
|
# Common Extra information needed for all providers
|
||||||
# We pass num retries, api_base, vertex_deployment etc to the exception here
|
# We pass num retries, api_base, vertex_deployment etc to the exception here
|
||||||
################################################################################
|
################################################################################
|
||||||
|
extra_information = ""
|
||||||
_api_base = litellm.get_api_base(model=model, optional_params=extra_kwargs)
|
try:
|
||||||
|
_api_base = litellm.get_api_base(
|
||||||
|
model=model, optional_params=extra_kwargs
|
||||||
|
)
|
||||||
messages = litellm.get_first_chars_messages(kwargs=completion_kwargs)
|
messages = litellm.get_first_chars_messages(kwargs=completion_kwargs)
|
||||||
_vertex_project = extra_kwargs.get("vertex_project")
|
_vertex_project = extra_kwargs.get("vertex_project")
|
||||||
_vertex_location = extra_kwargs.get("vertex_location")
|
_vertex_location = extra_kwargs.get("vertex_location")
|
||||||
|
@ -8182,6 +8171,9 @@ def exception_type(
|
||||||
extra_information = _add_key_name_and_team_to_alert(
|
extra_information = _add_key_name_and_team_to_alert(
|
||||||
request_info=extra_information, metadata=_metadata
|
request_info=extra_information, metadata=_metadata
|
||||||
)
|
)
|
||||||
|
except:
|
||||||
|
# DO NOT LET this Block raising the original exception
|
||||||
|
pass
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
# End of Common Extra information Needed for all providers
|
# End of Common Extra information Needed for all providers
|
||||||
|
@ -8194,9 +8186,10 @@ def exception_type(
|
||||||
if "Request Timeout Error" in error_str or "Request timed out" in error_str:
|
if "Request Timeout Error" in error_str or "Request timed out" in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise Timeout(
|
raise Timeout(
|
||||||
message=f"APITimeoutError - Request timed out. {extra_information} \n error_str: {error_str}",
|
message=f"APITimeoutError - Request timed out. \nerror_str: {error_str}",
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider=custom_llm_provider,
|
llm_provider=custom_llm_provider,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
)
|
)
|
||||||
|
|
||||||
if (
|
if (
|
||||||
|
@ -8226,10 +8219,11 @@ def exception_type(
|
||||||
if "This model's maximum context length is" in error_str:
|
if "This model's maximum context length is" in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise ContextWindowExceededError(
|
raise ContextWindowExceededError(
|
||||||
message=f"{exception_provider} - {message} {extra_information}",
|
message=f"{exception_provider} - {message}",
|
||||||
llm_provider=custom_llm_provider,
|
llm_provider=custom_llm_provider,
|
||||||
model=model,
|
model=model,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
)
|
)
|
||||||
elif (
|
elif (
|
||||||
"invalid_request_error" in error_str
|
"invalid_request_error" in error_str
|
||||||
|
@ -8237,10 +8231,11 @@ def exception_type(
|
||||||
):
|
):
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise NotFoundError(
|
raise NotFoundError(
|
||||||
message=f"{exception_provider} - {message} {extra_information}",
|
message=f"{exception_provider} - {message}",
|
||||||
llm_provider=custom_llm_provider,
|
llm_provider=custom_llm_provider,
|
||||||
model=model,
|
model=model,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
)
|
)
|
||||||
elif (
|
elif (
|
||||||
"invalid_request_error" in error_str
|
"invalid_request_error" in error_str
|
||||||
|
@ -8248,10 +8243,11 @@ def exception_type(
|
||||||
):
|
):
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise ContentPolicyViolationError(
|
raise ContentPolicyViolationError(
|
||||||
message=f"{exception_provider} - {message} {extra_information}",
|
message=f"{exception_provider} - {message}",
|
||||||
llm_provider=custom_llm_provider,
|
llm_provider=custom_llm_provider,
|
||||||
model=model,
|
model=model,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
)
|
)
|
||||||
elif (
|
elif (
|
||||||
"invalid_request_error" in error_str
|
"invalid_request_error" in error_str
|
||||||
|
@ -8259,17 +8255,19 @@ def exception_type(
|
||||||
):
|
):
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise BadRequestError(
|
raise BadRequestError(
|
||||||
message=f"{exception_provider} - {message} {extra_information}",
|
message=f"{exception_provider} - {message}",
|
||||||
llm_provider=custom_llm_provider,
|
llm_provider=custom_llm_provider,
|
||||||
model=model,
|
model=model,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
)
|
)
|
||||||
elif "Request too large" in error_str:
|
elif "Request too large" in error_str:
|
||||||
raise RateLimitError(
|
raise RateLimitError(
|
||||||
message=f"{exception_provider} - {message} {extra_information}",
|
message=f"{exception_provider} - {message}",
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider=custom_llm_provider,
|
llm_provider=custom_llm_provider,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
)
|
)
|
||||||
elif (
|
elif (
|
||||||
"The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable"
|
"The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable"
|
||||||
|
@ -8277,10 +8275,11 @@ def exception_type(
|
||||||
):
|
):
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise AuthenticationError(
|
raise AuthenticationError(
|
||||||
message=f"{exception_provider} - {message} {extra_information}",
|
message=f"{exception_provider} - {message}",
|
||||||
llm_provider=custom_llm_provider,
|
llm_provider=custom_llm_provider,
|
||||||
model=model,
|
model=model,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
)
|
)
|
||||||
elif "Mistral API raised a streaming error" in error_str:
|
elif "Mistral API raised a streaming error" in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
@ -8289,82 +8288,92 @@ def exception_type(
|
||||||
)
|
)
|
||||||
raise APIError(
|
raise APIError(
|
||||||
status_code=500,
|
status_code=500,
|
||||||
message=f"{exception_provider} - {message} {extra_information}",
|
message=f"{exception_provider} - {message}",
|
||||||
llm_provider=custom_llm_provider,
|
llm_provider=custom_llm_provider,
|
||||||
model=model,
|
model=model,
|
||||||
request=_request,
|
request=_request,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
)
|
)
|
||||||
elif hasattr(original_exception, "status_code"):
|
elif hasattr(original_exception, "status_code"):
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
if original_exception.status_code == 401:
|
if original_exception.status_code == 401:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise AuthenticationError(
|
raise AuthenticationError(
|
||||||
message=f"{exception_provider} - {message} {extra_information}",
|
message=f"{exception_provider} - {message}",
|
||||||
llm_provider=custom_llm_provider,
|
llm_provider=custom_llm_provider,
|
||||||
model=model,
|
model=model,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
)
|
)
|
||||||
elif original_exception.status_code == 404:
|
elif original_exception.status_code == 404:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise NotFoundError(
|
raise NotFoundError(
|
||||||
message=f"{exception_provider} - {message} {extra_information}",
|
message=f"{exception_provider} - {message}",
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider=custom_llm_provider,
|
llm_provider=custom_llm_provider,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
)
|
)
|
||||||
elif original_exception.status_code == 408:
|
elif original_exception.status_code == 408:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise Timeout(
|
raise Timeout(
|
||||||
message=f"{exception_provider} - {message} {extra_information}",
|
message=f"{exception_provider} - {message}",
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider=custom_llm_provider,
|
llm_provider=custom_llm_provider,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
)
|
)
|
||||||
elif original_exception.status_code == 422:
|
elif original_exception.status_code == 422:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise BadRequestError(
|
raise BadRequestError(
|
||||||
message=f"{exception_provider} - {message} {extra_information}",
|
message=f"{exception_provider} - {message}",
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider=custom_llm_provider,
|
llm_provider=custom_llm_provider,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
)
|
)
|
||||||
elif original_exception.status_code == 429:
|
elif original_exception.status_code == 429:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise RateLimitError(
|
raise RateLimitError(
|
||||||
message=f"{exception_provider} - {message} {extra_information}",
|
message=f"{exception_provider} - {message}",
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider=custom_llm_provider,
|
llm_provider=custom_llm_provider,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
)
|
)
|
||||||
elif original_exception.status_code == 503:
|
elif original_exception.status_code == 503:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise ServiceUnavailableError(
|
raise ServiceUnavailableError(
|
||||||
message=f"{exception_provider} - {message} {extra_information}",
|
message=f"{exception_provider} - {message}",
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider=custom_llm_provider,
|
llm_provider=custom_llm_provider,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
)
|
)
|
||||||
elif original_exception.status_code == 504: # gateway timeout error
|
elif original_exception.status_code == 504: # gateway timeout error
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise Timeout(
|
raise Timeout(
|
||||||
message=f"{exception_provider} - {message} {extra_information}",
|
message=f"{exception_provider} - {message}",
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider=custom_llm_provider,
|
llm_provider=custom_llm_provider,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise APIError(
|
raise APIError(
|
||||||
status_code=original_exception.status_code,
|
status_code=original_exception.status_code,
|
||||||
message=f"{exception_provider} - {message} {extra_information}",
|
message=f"{exception_provider} - {message}",
|
||||||
llm_provider=custom_llm_provider,
|
llm_provider=custom_llm_provider,
|
||||||
model=model,
|
model=model,
|
||||||
request=original_exception.request,
|
request=original_exception.request,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
|
# if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
|
||||||
raise APIConnectionError(
|
raise APIConnectionError(
|
||||||
message=f"{exception_provider} - {message} {extra_information}",
|
message=f"{exception_provider} - {message}",
|
||||||
llm_provider=custom_llm_provider,
|
llm_provider=custom_llm_provider,
|
||||||
model=model,
|
model=model,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
request=httpx.Request(
|
request=httpx.Request(
|
||||||
method="POST", url="https://api.openai.com/v1/"
|
method="POST", url="https://api.openai.com/v1/"
|
||||||
),
|
),
|
||||||
|
@ -8529,6 +8538,28 @@ def exception_type(
|
||||||
model=model,
|
model=model,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
)
|
)
|
||||||
|
elif custom_llm_provider == "predibase":
|
||||||
|
if "authorization denied for" in error_str:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
|
||||||
|
# Predibase returns the raw API Key in the response - this block ensures it's not returned in the exception
|
||||||
|
if (
|
||||||
|
error_str is not None
|
||||||
|
and isinstance(error_str, str)
|
||||||
|
and "bearer" in error_str.lower()
|
||||||
|
):
|
||||||
|
# only keep the first 10 chars after the occurnence of "bearer"
|
||||||
|
_bearer_token_start_index = error_str.lower().find("bearer")
|
||||||
|
error_str = error_str[: _bearer_token_start_index + 14]
|
||||||
|
error_str += "XXXXXXX" + '"'
|
||||||
|
|
||||||
|
raise AuthenticationError(
|
||||||
|
message=f"PredibaseException: Authentication Error - {error_str}",
|
||||||
|
llm_provider="predibase",
|
||||||
|
model=model,
|
||||||
|
response=original_exception.response,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
|
)
|
||||||
elif custom_llm_provider == "bedrock":
|
elif custom_llm_provider == "bedrock":
|
||||||
if (
|
if (
|
||||||
"too many tokens" in error_str
|
"too many tokens" in error_str
|
||||||
|
@ -8666,10 +8697,11 @@ def exception_type(
|
||||||
):
|
):
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise BadRequestError(
|
raise BadRequestError(
|
||||||
message=f"VertexAIException - {error_str} {extra_information}",
|
message=f"VertexAIException - {error_str}",
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider="vertex_ai",
|
llm_provider="vertex_ai",
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
)
|
)
|
||||||
elif (
|
elif (
|
||||||
"None Unknown Error." in error_str
|
"None Unknown Error." in error_str
|
||||||
|
@ -8677,26 +8709,29 @@ def exception_type(
|
||||||
):
|
):
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise APIError(
|
raise APIError(
|
||||||
message=f"VertexAIException - {error_str} {extra_information}",
|
message=f"VertexAIException - {error_str}",
|
||||||
status_code=500,
|
status_code=500,
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider="vertex_ai",
|
llm_provider="vertex_ai",
|
||||||
request=original_exception.request,
|
request=original_exception.request,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
)
|
)
|
||||||
elif "403" in error_str:
|
elif "403" in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise BadRequestError(
|
raise BadRequestError(
|
||||||
message=f"VertexAIException - {error_str} {extra_information}",
|
message=f"VertexAIException - {error_str}",
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider="vertex_ai",
|
llm_provider="vertex_ai",
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
)
|
)
|
||||||
elif "The response was blocked." in error_str:
|
elif "The response was blocked." in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise UnprocessableEntityError(
|
raise UnprocessableEntityError(
|
||||||
message=f"VertexAIException - {error_str} {extra_information}",
|
message=f"VertexAIException - {error_str}",
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider="vertex_ai",
|
llm_provider="vertex_ai",
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
response=httpx.Response(
|
response=httpx.Response(
|
||||||
status_code=429,
|
status_code=429,
|
||||||
request=httpx.Request(
|
request=httpx.Request(
|
||||||
|
@ -8713,9 +8748,10 @@ def exception_type(
|
||||||
):
|
):
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise RateLimitError(
|
raise RateLimitError(
|
||||||
message=f"VertexAIException - {error_str} {extra_information}",
|
message=f"VertexAIException - {error_str}",
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider="vertex_ai",
|
llm_provider="vertex_ai",
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
response=httpx.Response(
|
response=httpx.Response(
|
||||||
status_code=429,
|
status_code=429,
|
||||||
request=httpx.Request(
|
request=httpx.Request(
|
||||||
|
@ -8728,18 +8764,20 @@ def exception_type(
|
||||||
if original_exception.status_code == 400:
|
if original_exception.status_code == 400:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise BadRequestError(
|
raise BadRequestError(
|
||||||
message=f"VertexAIException - {error_str} {extra_information}",
|
message=f"VertexAIException - {error_str}",
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider="vertex_ai",
|
llm_provider="vertex_ai",
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
)
|
)
|
||||||
if original_exception.status_code == 500:
|
if original_exception.status_code == 500:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise APIError(
|
raise APIError(
|
||||||
message=f"VertexAIException - {error_str} {extra_information}",
|
message=f"VertexAIException - {error_str}",
|
||||||
status_code=500,
|
status_code=500,
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider="vertex_ai",
|
llm_provider="vertex_ai",
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
request=original_exception.request,
|
request=original_exception.request,
|
||||||
)
|
)
|
||||||
elif custom_llm_provider == "palm" or custom_llm_provider == "gemini":
|
elif custom_llm_provider == "palm" or custom_llm_provider == "gemini":
|
||||||
|
@ -9340,25 +9378,28 @@ def exception_type(
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise APIError(
|
raise APIError(
|
||||||
status_code=500,
|
status_code=500,
|
||||||
message=f"AzureException - {original_exception.message} {extra_information}",
|
message=f"AzureException - {original_exception.message}",
|
||||||
llm_provider="azure",
|
llm_provider="azure",
|
||||||
model=model,
|
model=model,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
request=httpx.Request(method="POST", url="https://openai.com/"),
|
request=httpx.Request(method="POST", url="https://openai.com/"),
|
||||||
)
|
)
|
||||||
elif "This model's maximum context length is" in error_str:
|
elif "This model's maximum context length is" in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise ContextWindowExceededError(
|
raise ContextWindowExceededError(
|
||||||
message=f"AzureException - {original_exception.message} {extra_information}",
|
message=f"AzureException - {original_exception.message}",
|
||||||
llm_provider="azure",
|
llm_provider="azure",
|
||||||
model=model,
|
model=model,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
)
|
)
|
||||||
elif "DeploymentNotFound" in error_str:
|
elif "DeploymentNotFound" in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise NotFoundError(
|
raise NotFoundError(
|
||||||
message=f"AzureException - {original_exception.message} {extra_information}",
|
message=f"AzureException - {original_exception.message}",
|
||||||
llm_provider="azure",
|
llm_provider="azure",
|
||||||
model=model,
|
model=model,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
)
|
)
|
||||||
elif (
|
elif (
|
||||||
|
@ -9370,17 +9411,19 @@ def exception_type(
|
||||||
):
|
):
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise ContentPolicyViolationError(
|
raise ContentPolicyViolationError(
|
||||||
message=f"AzureException - {original_exception.message} {extra_information}",
|
message=f"AzureException - {original_exception.message}",
|
||||||
llm_provider="azure",
|
llm_provider="azure",
|
||||||
model=model,
|
model=model,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
)
|
)
|
||||||
elif "invalid_request_error" in error_str:
|
elif "invalid_request_error" in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise BadRequestError(
|
raise BadRequestError(
|
||||||
message=f"AzureException - {original_exception.message} {extra_information}",
|
message=f"AzureException - {original_exception.message}",
|
||||||
llm_provider="azure",
|
llm_provider="azure",
|
||||||
model=model,
|
model=model,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
)
|
)
|
||||||
elif (
|
elif (
|
||||||
|
@ -9389,9 +9432,10 @@ def exception_type(
|
||||||
):
|
):
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise AuthenticationError(
|
raise AuthenticationError(
|
||||||
message=f"{exception_provider} - {original_exception.message} {extra_information}",
|
message=f"{exception_provider} - {original_exception.message}",
|
||||||
llm_provider=custom_llm_provider,
|
llm_provider=custom_llm_provider,
|
||||||
model=model,
|
model=model,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
)
|
)
|
||||||
elif hasattr(original_exception, "status_code"):
|
elif hasattr(original_exception, "status_code"):
|
||||||
|
@ -9399,55 +9443,62 @@ def exception_type(
|
||||||
if original_exception.status_code == 401:
|
if original_exception.status_code == 401:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise AuthenticationError(
|
raise AuthenticationError(
|
||||||
message=f"AzureException - {original_exception.message} {extra_information}",
|
message=f"AzureException - {original_exception.message}",
|
||||||
llm_provider="azure",
|
llm_provider="azure",
|
||||||
model=model,
|
model=model,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
)
|
)
|
||||||
elif original_exception.status_code == 408:
|
elif original_exception.status_code == 408:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise Timeout(
|
raise Timeout(
|
||||||
message=f"AzureException - {original_exception.message} {extra_information}",
|
message=f"AzureException - {original_exception.message}",
|
||||||
model=model,
|
model=model,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
llm_provider="azure",
|
llm_provider="azure",
|
||||||
)
|
)
|
||||||
if original_exception.status_code == 422:
|
if original_exception.status_code == 422:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise BadRequestError(
|
raise BadRequestError(
|
||||||
message=f"AzureException - {original_exception.message} {extra_information}",
|
message=f"AzureException - {original_exception.message}",
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider="azure",
|
llm_provider="azure",
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
)
|
)
|
||||||
elif original_exception.status_code == 429:
|
elif original_exception.status_code == 429:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise RateLimitError(
|
raise RateLimitError(
|
||||||
message=f"AzureException - {original_exception.message} {extra_information}",
|
message=f"AzureException - {original_exception.message}",
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider="azure",
|
llm_provider="azure",
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
)
|
)
|
||||||
elif original_exception.status_code == 503:
|
elif original_exception.status_code == 503:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise ServiceUnavailableError(
|
raise ServiceUnavailableError(
|
||||||
message=f"AzureException - {original_exception.message} {extra_information}",
|
message=f"AzureException - {original_exception.message}",
|
||||||
model=model,
|
model=model,
|
||||||
llm_provider="azure",
|
llm_provider="azure",
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
)
|
)
|
||||||
elif original_exception.status_code == 504: # gateway timeout error
|
elif original_exception.status_code == 504: # gateway timeout error
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise Timeout(
|
raise Timeout(
|
||||||
message=f"AzureException - {original_exception.message} {extra_information}",
|
message=f"AzureException - {original_exception.message}",
|
||||||
model=model,
|
model=model,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
llm_provider="azure",
|
llm_provider="azure",
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise APIError(
|
raise APIError(
|
||||||
status_code=original_exception.status_code,
|
status_code=original_exception.status_code,
|
||||||
message=f"AzureException - {original_exception.message} {extra_information}",
|
message=f"AzureException - {original_exception.message}",
|
||||||
llm_provider="azure",
|
llm_provider="azure",
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
model=model,
|
model=model,
|
||||||
request=httpx.Request(
|
request=httpx.Request(
|
||||||
method="POST", url="https://openai.com/"
|
method="POST", url="https://openai.com/"
|
||||||
|
@ -9456,9 +9507,10 @@ def exception_type(
|
||||||
else:
|
else:
|
||||||
# if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
|
# if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
|
||||||
raise APIConnectionError(
|
raise APIConnectionError(
|
||||||
message=f"{exception_provider} - {message} {extra_information}",
|
message=f"{exception_provider} - {message}",
|
||||||
llm_provider="azure",
|
llm_provider="azure",
|
||||||
model=model,
|
model=model,
|
||||||
|
litellm_debug_info=extra_information,
|
||||||
request=httpx.Request(method="POST", url="https://openai.com/"),
|
request=httpx.Request(method="POST", url="https://openai.com/"),
|
||||||
)
|
)
|
||||||
if (
|
if (
|
||||||
|
|
|
@ -1744,6 +1744,30 @@
|
||||||
"litellm_provider": "openrouter",
|
"litellm_provider": "openrouter",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
|
"openrouter/openai/gpt-4o": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000005,
|
||||||
|
"output_cost_per_token": 0.000015,
|
||||||
|
"litellm_provider": "openrouter",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_vision": true
|
||||||
|
},
|
||||||
|
"openrouter/openai/gpt-4o-2024-05-13": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000005,
|
||||||
|
"output_cost_per_token": 0.000015,
|
||||||
|
"litellm_provider": "openrouter",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_vision": true
|
||||||
|
},
|
||||||
"openrouter/openai/gpt-4-vision-preview": {
|
"openrouter/openai/gpt-4-vision-preview": {
|
||||||
"max_tokens": 130000,
|
"max_tokens": 130000,
|
||||||
"input_cost_per_token": 0.00001,
|
"input_cost_per_token": 0.00001,
|
||||||
|
@ -2943,6 +2967,24 @@
|
||||||
"litellm_provider": "ollama",
|
"litellm_provider": "ollama",
|
||||||
"mode": "completion"
|
"mode": "completion"
|
||||||
},
|
},
|
||||||
|
"ollama/llama3": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 8192,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "ollama",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"ollama/llama3:70b": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 8192,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "ollama",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"ollama/mistral": {
|
"ollama/mistral": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 8192,
|
"max_input_tokens": 8192,
|
||||||
|
@ -2952,6 +2994,42 @@
|
||||||
"litellm_provider": "ollama",
|
"litellm_provider": "ollama",
|
||||||
"mode": "completion"
|
"mode": "completion"
|
||||||
},
|
},
|
||||||
|
"ollama/mistral-7B-Instruct-v0.1": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 8192,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "ollama",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"ollama/mistral-7B-Instruct-v0.2": {
|
||||||
|
"max_tokens": 32768,
|
||||||
|
"max_input_tokens": 32768,
|
||||||
|
"max_output_tokens": 32768,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "ollama",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"ollama/mixtral-8x7B-Instruct-v0.1": {
|
||||||
|
"max_tokens": 32768,
|
||||||
|
"max_input_tokens": 32768,
|
||||||
|
"max_output_tokens": 32768,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "ollama",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"ollama/mixtral-8x22B-Instruct-v0.1": {
|
||||||
|
"max_tokens": 65536,
|
||||||
|
"max_input_tokens": 65536,
|
||||||
|
"max_output_tokens": 65536,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "ollama",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"ollama/codellama": {
|
"ollama/codellama": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"max_input_tokens": 4096,
|
"max_input_tokens": 4096,
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.37.10"
|
version = "1.37.12"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -79,7 +79,7 @@ requires = ["poetry-core", "wheel"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "1.37.10"
|
version = "1.37.12"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f04e46b02318b660.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[7926,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"884\",\"static/chunks/884-7576ee407a2ecbe6.js\",\"931\",\"static/chunks/app/page-6a39771cacf75ea6.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f04e46b02318b660.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"obp5wqVSVDMiDTC414cR8\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-de9c0fadf6a94b3b.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f04e46b02318b660.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[4858,[\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"884\",\"static/chunks/884-7576ee407a2ecbe6.js\",\"931\",\"static/chunks/app/page-c35c14c9afd091ec.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f04e46b02318b660.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"2ASoJGxS-D4w-vat00xMy\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[7926,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","884","static/chunks/884-7576ee407a2ecbe6.js","931","static/chunks/app/page-6a39771cacf75ea6.js"],""]
|
3:I[4858,["936","static/chunks/2f6dbc85-052c4579f80d66ae.js","884","static/chunks/884-7576ee407a2ecbe6.js","931","static/chunks/app/page-c35c14c9afd091ec.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["obp5wqVSVDMiDTC414cR8",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f04e46b02318b660.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["2ASoJGxS-D4w-vat00xMy",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f04e46b02318b660.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -23,12 +23,44 @@ import {
|
||||||
AccordionHeader,
|
AccordionHeader,
|
||||||
AccordionList,
|
AccordionList,
|
||||||
} from "@tremor/react";
|
} from "@tremor/react";
|
||||||
import { TabPanel, TabPanels, TabGroup, TabList, Tab, Icon } from "@tremor/react";
|
import {
|
||||||
import { getCallbacksCall, setCallbacksCall, serviceHealthCheck } from "./networking";
|
TabPanel,
|
||||||
import { Modal, Form, Input, Select, Button as Button2, message } from "antd";
|
TabPanels,
|
||||||
import { InformationCircleIcon, PencilAltIcon, PencilIcon, StatusOnlineIcon, TrashIcon, RefreshIcon } from "@heroicons/react/outline";
|
TabGroup,
|
||||||
|
TabList,
|
||||||
|
Tab,
|
||||||
|
Icon,
|
||||||
|
} from "@tremor/react";
|
||||||
|
import {
|
||||||
|
getCallbacksCall,
|
||||||
|
setCallbacksCall,
|
||||||
|
getGeneralSettingsCall,
|
||||||
|
serviceHealthCheck,
|
||||||
|
updateConfigFieldSetting,
|
||||||
|
deleteConfigFieldSetting,
|
||||||
|
} from "./networking";
|
||||||
|
import {
|
||||||
|
Modal,
|
||||||
|
Form,
|
||||||
|
Input,
|
||||||
|
Select,
|
||||||
|
Button as Button2,
|
||||||
|
message,
|
||||||
|
InputNumber,
|
||||||
|
} from "antd";
|
||||||
|
import {
|
||||||
|
InformationCircleIcon,
|
||||||
|
PencilAltIcon,
|
||||||
|
PencilIcon,
|
||||||
|
StatusOnlineIcon,
|
||||||
|
TrashIcon,
|
||||||
|
RefreshIcon,
|
||||||
|
CheckCircleIcon,
|
||||||
|
XCircleIcon,
|
||||||
|
QuestionMarkCircleIcon,
|
||||||
|
} from "@heroicons/react/outline";
|
||||||
import StaticGenerationSearchParamsBailoutProvider from "next/dist/client/components/static-generation-searchparams-bailout-provider";
|
import StaticGenerationSearchParamsBailoutProvider from "next/dist/client/components/static-generation-searchparams-bailout-provider";
|
||||||
import AddFallbacks from "./add_fallbacks"
|
import AddFallbacks from "./add_fallbacks";
|
||||||
import openai from "openai";
|
import openai from "openai";
|
||||||
import Paragraph from "antd/es/skeleton/Paragraph";
|
import Paragraph from "antd/es/skeleton/Paragraph";
|
||||||
|
|
||||||
|
@ -36,7 +68,7 @@ interface GeneralSettingsPageProps {
|
||||||
accessToken: string | null;
|
accessToken: string | null;
|
||||||
userRole: string | null;
|
userRole: string | null;
|
||||||
userID: string | null;
|
userID: string | null;
|
||||||
modelData: any
|
modelData: any;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function testFallbackModelResponse(
|
async function testFallbackModelResponse(
|
||||||
|
@ -65,24 +97,39 @@ async function testFallbackModelResponse(
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
// @ts-ignore
|
// @ts-ignore
|
||||||
mock_testing_fallbacks: true
|
mock_testing_fallbacks: true,
|
||||||
});
|
});
|
||||||
|
|
||||||
message.success(
|
message.success(
|
||||||
<span>
|
<span>
|
||||||
Test model=<strong>{selectedModel}</strong>, received model=<strong>{response.model}</strong>.
|
Test model=<strong>{selectedModel}</strong>, received model=
|
||||||
See <a href="#" onClick={() => window.open('https://docs.litellm.ai/docs/proxy/reliability', '_blank')} style={{ textDecoration: 'underline', color: 'blue' }}>curl</a>
|
<strong>{response.model}</strong>. See{" "}
|
||||||
|
<a
|
||||||
|
href="#"
|
||||||
|
onClick={() =>
|
||||||
|
window.open(
|
||||||
|
"https://docs.litellm.ai/docs/proxy/reliability",
|
||||||
|
"_blank"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
style={{ textDecoration: "underline", color: "blue" }}
|
||||||
|
>
|
||||||
|
curl
|
||||||
|
</a>
|
||||||
</span>
|
</span>
|
||||||
);
|
);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
message.error(`Error occurred while generating model response. Please try again. Error: ${error}`, 20);
|
message.error(
|
||||||
|
`Error occurred while generating model response. Please try again. Error: ${error}`,
|
||||||
|
20
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
interface AccordionHeroProps {
|
interface AccordionHeroProps {
|
||||||
selectedStrategy: string | null;
|
selectedStrategy: string | null;
|
||||||
strategyArgs: routingStrategyArgs;
|
strategyArgs: routingStrategyArgs;
|
||||||
paramExplanation: { [key: string]: string }
|
paramExplanation: { [key: string]: string };
|
||||||
}
|
}
|
||||||
|
|
||||||
interface routingStrategyArgs {
|
interface routingStrategyArgs {
|
||||||
|
@ -90,17 +137,30 @@ interface routingStrategyArgs {
|
||||||
lowest_latency_buffer?: number;
|
lowest_latency_buffer?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
const defaultLowestLatencyArgs: routingStrategyArgs = {
|
interface generalSettingsItem {
|
||||||
"ttl": 3600,
|
field_name: string;
|
||||||
"lowest_latency_buffer": 0
|
field_type: string;
|
||||||
|
field_value: any;
|
||||||
|
field_description: string;
|
||||||
|
stored_in_db: boolean | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export const AccordionHero: React.FC<AccordionHeroProps> = ({ selectedStrategy, strategyArgs, paramExplanation }) => (
|
const defaultLowestLatencyArgs: routingStrategyArgs = {
|
||||||
|
ttl: 3600,
|
||||||
|
lowest_latency_buffer: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
export const AccordionHero: React.FC<AccordionHeroProps> = ({
|
||||||
|
selectedStrategy,
|
||||||
|
strategyArgs,
|
||||||
|
paramExplanation,
|
||||||
|
}) => (
|
||||||
<Accordion>
|
<Accordion>
|
||||||
<AccordionHeader className="text-sm font-medium text-tremor-content-strong dark:text-dark-tremor-content-strong">Routing Strategy Specific Args</AccordionHeader>
|
<AccordionHeader className="text-sm font-medium text-tremor-content-strong dark:text-dark-tremor-content-strong">
|
||||||
|
Routing Strategy Specific Args
|
||||||
|
</AccordionHeader>
|
||||||
<AccordionBody>
|
<AccordionBody>
|
||||||
{
|
{selectedStrategy == "latency-based-routing" ? (
|
||||||
selectedStrategy == "latency-based-routing" ?
|
|
||||||
<Card>
|
<Card>
|
||||||
<Table>
|
<Table>
|
||||||
<TableHead>
|
<TableHead>
|
||||||
|
@ -114,13 +174,24 @@ export const AccordionHero: React.FC<AccordionHeroProps> = ({ selectedStrategy,
|
||||||
<TableRow key={param}>
|
<TableRow key={param}>
|
||||||
<TableCell>
|
<TableCell>
|
||||||
<Text>{param}</Text>
|
<Text>{param}</Text>
|
||||||
<p style={{fontSize: '0.65rem', color: '#808080', fontStyle: 'italic'}} className="mt-1">{paramExplanation[param]}</p>
|
<p
|
||||||
|
style={{
|
||||||
|
fontSize: "0.65rem",
|
||||||
|
color: "#808080",
|
||||||
|
fontStyle: "italic",
|
||||||
|
}}
|
||||||
|
className="mt-1"
|
||||||
|
>
|
||||||
|
{paramExplanation[param]}
|
||||||
|
</p>
|
||||||
</TableCell>
|
</TableCell>
|
||||||
<TableCell>
|
<TableCell>
|
||||||
<TextInput
|
<TextInput
|
||||||
name={param}
|
name={param}
|
||||||
defaultValue={
|
defaultValue={
|
||||||
typeof value === 'object' ? JSON.stringify(value, null, 2) : value.toString()
|
typeof value === "object"
|
||||||
|
? JSON.stringify(value, null, 2)
|
||||||
|
: value.toString()
|
||||||
}
|
}
|
||||||
/>
|
/>
|
||||||
</TableCell>
|
</TableCell>
|
||||||
|
@ -129,8 +200,9 @@ export const AccordionHero: React.FC<AccordionHeroProps> = ({ selectedStrategy,
|
||||||
</TableBody>
|
</TableBody>
|
||||||
</Table>
|
</Table>
|
||||||
</Card>
|
</Card>
|
||||||
: <Text>No specific settings</Text>
|
) : (
|
||||||
}
|
<Text>No specific settings</Text>
|
||||||
|
)}
|
||||||
</AccordionBody>
|
</AccordionBody>
|
||||||
</Accordion>
|
</Accordion>
|
||||||
);
|
);
|
||||||
|
@ -139,26 +211,38 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
|
||||||
accessToken,
|
accessToken,
|
||||||
userRole,
|
userRole,
|
||||||
userID,
|
userID,
|
||||||
modelData
|
modelData,
|
||||||
}) => {
|
}) => {
|
||||||
const [routerSettings, setRouterSettings] = useState<{ [key: string]: any }>({});
|
const [routerSettings, setRouterSettings] = useState<{ [key: string]: any }>(
|
||||||
|
{}
|
||||||
|
);
|
||||||
|
const [generalSettingsDict, setGeneralSettingsDict] = useState<{
|
||||||
|
[key: string]: any;
|
||||||
|
}>({});
|
||||||
|
const [generalSettings, setGeneralSettings] = useState<generalSettingsItem[]>(
|
||||||
|
[]
|
||||||
|
);
|
||||||
const [isModalVisible, setIsModalVisible] = useState(false);
|
const [isModalVisible, setIsModalVisible] = useState(false);
|
||||||
const [form] = Form.useForm();
|
const [form] = Form.useForm();
|
||||||
const [selectedCallback, setSelectedCallback] = useState<string | null>(null);
|
const [selectedCallback, setSelectedCallback] = useState<string | null>(null);
|
||||||
const [selectedStrategy, setSelectedStrategy] = useState<string | null>(null)
|
const [selectedStrategy, setSelectedStrategy] = useState<string | null>(null);
|
||||||
const [strategySettings, setStrategySettings] = useState<routingStrategyArgs | null>(null);
|
const [strategySettings, setStrategySettings] =
|
||||||
|
useState<routingStrategyArgs | null>(null);
|
||||||
|
|
||||||
let paramExplanation: { [key: string]: string } = {
|
let paramExplanation: { [key: string]: string } = {
|
||||||
"routing_strategy_args": "(dict) Arguments to pass to the routing strategy",
|
routing_strategy_args: "(dict) Arguments to pass to the routing strategy",
|
||||||
"routing_strategy": "(string) Routing strategy to use",
|
routing_strategy: "(string) Routing strategy to use",
|
||||||
"allowed_fails": "(int) Number of times a deployment can fail before being added to cooldown",
|
allowed_fails:
|
||||||
"cooldown_time": "(int) time in seconds to cooldown a deployment after failure",
|
"(int) Number of times a deployment can fail before being added to cooldown",
|
||||||
"num_retries": "(int) Number of retries for failed requests. Defaults to 0.",
|
cooldown_time:
|
||||||
"timeout": "(float) Timeout for requests. Defaults to None.",
|
"(int) time in seconds to cooldown a deployment after failure",
|
||||||
"retry_after": "(int) Minimum time to wait before retrying a failed request",
|
num_retries: "(int) Number of retries for failed requests. Defaults to 0.",
|
||||||
"ttl": "(int) Sliding window to look back over when calculating the average latency of a deployment. Default - 1 hour (in seconds).",
|
timeout: "(float) Timeout for requests. Defaults to None.",
|
||||||
"lowest_latency_buffer": "(float) Shuffle between deployments within this % of the lowest latency. Default - 0 (i.e. always pick lowest latency)."
|
retry_after: "(int) Minimum time to wait before retrying a failed request",
|
||||||
}
|
ttl: "(int) Sliding window to look back over when calculating the average latency of a deployment. Default - 1 hour (in seconds).",
|
||||||
|
lowest_latency_buffer:
|
||||||
|
"(float) Shuffle between deployments within this % of the lowest latency. Default - 0 (i.e. always pick lowest latency).",
|
||||||
|
};
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!accessToken || !userRole || !userID) {
|
if (!accessToken || !userRole || !userID) {
|
||||||
|
@ -169,6 +253,10 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
|
||||||
let router_settings = data.router_settings;
|
let router_settings = data.router_settings;
|
||||||
setRouterSettings(router_settings);
|
setRouterSettings(router_settings);
|
||||||
});
|
});
|
||||||
|
getGeneralSettingsCall(accessToken).then((data) => {
|
||||||
|
let general_settings = data;
|
||||||
|
setGeneralSettings(general_settings);
|
||||||
|
});
|
||||||
}, [accessToken, userRole, userID]);
|
}, [accessToken, userRole, userID]);
|
||||||
|
|
||||||
const handleAddCallback = () => {
|
const handleAddCallback = () => {
|
||||||
|
@ -190,8 +278,8 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`received key: ${key}`)
|
console.log(`received key: ${key}`);
|
||||||
console.log(`routerSettings['fallbacks']: ${routerSettings['fallbacks']}`)
|
console.log(`routerSettings['fallbacks']: ${routerSettings["fallbacks"]}`);
|
||||||
|
|
||||||
routerSettings["fallbacks"].map((dict: { [key: string]: any }) => {
|
routerSettings["fallbacks"].map((dict: { [key: string]: any }) => {
|
||||||
// Check if the dictionary has the specified key and delete it if present
|
// Check if the dictionary has the specified key and delete it if present
|
||||||
|
@ -202,19 +290,74 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
|
||||||
});
|
});
|
||||||
|
|
||||||
const payload = {
|
const payload = {
|
||||||
router_settings: routerSettings
|
router_settings: routerSettings,
|
||||||
};
|
};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await setCallbacksCall(accessToken, payload);
|
await setCallbacksCall(accessToken, payload);
|
||||||
setRouterSettings({ ...routerSettings });
|
setRouterSettings({ ...routerSettings });
|
||||||
setSelectedStrategy(routerSettings["routing_strategy"])
|
setSelectedStrategy(routerSettings["routing_strategy"]);
|
||||||
message.success("Router settings updated successfully");
|
message.success("Router settings updated successfully");
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
message.error("Failed to update router settings: " + error, 20);
|
message.error("Failed to update router settings: " + error, 20);
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleInputChange = (fieldName: string, newValue: any) => {
|
||||||
|
// Update the value in the state
|
||||||
|
const updatedSettings = generalSettings.map((setting) =>
|
||||||
|
setting.field_name === fieldName
|
||||||
|
? { ...setting, field_value: newValue }
|
||||||
|
: setting
|
||||||
|
);
|
||||||
|
setGeneralSettings(updatedSettings);
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleUpdateField = (fieldName: string, idx: number) => {
|
||||||
|
if (!accessToken) {
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let fieldValue = generalSettings[idx].field_value;
|
||||||
|
|
||||||
|
if (fieldValue == null || fieldValue == undefined) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
updateConfigFieldSetting(accessToken, fieldName, fieldValue);
|
||||||
|
// update value in state
|
||||||
|
|
||||||
|
const updatedSettings = generalSettings.map((setting) =>
|
||||||
|
setting.field_name === fieldName
|
||||||
|
? { ...setting, stored_in_db: true }
|
||||||
|
: setting
|
||||||
|
);
|
||||||
|
setGeneralSettings(updatedSettings);
|
||||||
|
} catch (error) {
|
||||||
|
// do something
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleResetField = (fieldName: string, idx: number) => {
|
||||||
|
if (!accessToken) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
deleteConfigFieldSetting(accessToken, fieldName);
|
||||||
|
// update value in state
|
||||||
|
|
||||||
|
const updatedSettings = generalSettings.map((setting) =>
|
||||||
|
setting.field_name === fieldName
|
||||||
|
? { ...setting, stored_in_db: null, field_value: null }
|
||||||
|
: setting
|
||||||
|
);
|
||||||
|
setGeneralSettings(updatedSettings);
|
||||||
|
} catch (error) {
|
||||||
|
// do something
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const handleSaveChanges = (router_settings: any) => {
|
const handleSaveChanges = (router_settings: any) => {
|
||||||
if (!accessToken) {
|
if (!accessToken) {
|
||||||
return;
|
return;
|
||||||
|
@ -223,39 +366,55 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
|
||||||
console.log("router_settings", router_settings);
|
console.log("router_settings", router_settings);
|
||||||
|
|
||||||
const updatedVariables = Object.fromEntries(
|
const updatedVariables = Object.fromEntries(
|
||||||
Object.entries(router_settings).map(([key, value]) => {
|
Object.entries(router_settings)
|
||||||
if (key !== 'routing_strategy_args' && key !== "routing_strategy") {
|
.map(([key, value]) => {
|
||||||
return [key, (document.querySelector(`input[name="${key}"]`) as HTMLInputElement)?.value || value];
|
if (key !== "routing_strategy_args" && key !== "routing_strategy") {
|
||||||
}
|
return [
|
||||||
else if (key == "routing_strategy") {
|
key,
|
||||||
return [key, selectedStrategy]
|
(
|
||||||
}
|
document.querySelector(
|
||||||
else if (key == "routing_strategy_args" && selectedStrategy == "latency-based-routing") {
|
`input[name="${key}"]`
|
||||||
let setRoutingStrategyArgs: routingStrategyArgs = {}
|
) as HTMLInputElement
|
||||||
|
)?.value || value,
|
||||||
|
];
|
||||||
|
} else if (key == "routing_strategy") {
|
||||||
|
return [key, selectedStrategy];
|
||||||
|
} else if (
|
||||||
|
key == "routing_strategy_args" &&
|
||||||
|
selectedStrategy == "latency-based-routing"
|
||||||
|
) {
|
||||||
|
let setRoutingStrategyArgs: routingStrategyArgs = {};
|
||||||
|
|
||||||
const lowestLatencyBufferElement = document.querySelector(`input[name="lowest_latency_buffer"]`) as HTMLInputElement;
|
const lowestLatencyBufferElement = document.querySelector(
|
||||||
const ttlElement = document.querySelector(`input[name="ttl"]`) as HTMLInputElement;
|
`input[name="lowest_latency_buffer"]`
|
||||||
|
) as HTMLInputElement;
|
||||||
|
const ttlElement = document.querySelector(
|
||||||
|
`input[name="ttl"]`
|
||||||
|
) as HTMLInputElement;
|
||||||
|
|
||||||
if (lowestLatencyBufferElement?.value) {
|
if (lowestLatencyBufferElement?.value) {
|
||||||
setRoutingStrategyArgs["lowest_latency_buffer"] = Number(lowestLatencyBufferElement.value)
|
setRoutingStrategyArgs["lowest_latency_buffer"] = Number(
|
||||||
|
lowestLatencyBufferElement.value
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ttlElement?.value) {
|
if (ttlElement?.value) {
|
||||||
setRoutingStrategyArgs["ttl"] = Number(ttlElement.value)
|
setRoutingStrategyArgs["ttl"] = Number(ttlElement.value);
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`setRoutingStrategyArgs: ${setRoutingStrategyArgs}`)
|
console.log(`setRoutingStrategyArgs: ${setRoutingStrategyArgs}`);
|
||||||
return [
|
return ["routing_strategy_args", setRoutingStrategyArgs];
|
||||||
"routing_strategy_args", setRoutingStrategyArgs
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}).filter(entry => entry !== null && entry !== undefined) as Iterable<[string, unknown]>
|
})
|
||||||
|
.filter((entry) => entry !== null && entry !== undefined) as Iterable<
|
||||||
|
[string, unknown]
|
||||||
|
>
|
||||||
);
|
);
|
||||||
console.log("updatedVariables", updatedVariables);
|
console.log("updatedVariables", updatedVariables);
|
||||||
|
|
||||||
const payload = {
|
const payload = {
|
||||||
router_settings: updatedVariables
|
router_settings: updatedVariables,
|
||||||
};
|
};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
@ -267,19 +426,17 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
|
||||||
message.success("router settings updated successfully");
|
message.success("router settings updated successfully");
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if (!accessToken) {
|
if (!accessToken) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="w-full mx-4">
|
<div className="w-full mx-4">
|
||||||
<TabGroup className="gap-2 p-8 h-[75vh] w-full mt-2">
|
<TabGroup className="gap-2 p-8 h-[75vh] w-full mt-2">
|
||||||
<TabList variant="line" defaultValue="1">
|
<TabList variant="line" defaultValue="1">
|
||||||
<Tab value="1">General Settings</Tab>
|
<Tab value="1">Loadbalancing</Tab>
|
||||||
<Tab value="2">Fallbacks</Tab>
|
<Tab value="2">Fallbacks</Tab>
|
||||||
|
<Tab value="3">General</Tab>
|
||||||
</TabList>
|
</TabList>
|
||||||
<TabPanels>
|
<TabPanels>
|
||||||
<TabPanel>
|
<TabPanel>
|
||||||
|
@ -294,27 +451,55 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
|
||||||
</TableRow>
|
</TableRow>
|
||||||
</TableHead>
|
</TableHead>
|
||||||
<TableBody>
|
<TableBody>
|
||||||
{Object.entries(routerSettings).filter(([param, value]) => param != "fallbacks" && param != "context_window_fallbacks" && param != "routing_strategy_args").map(([param, value]) => (
|
{Object.entries(routerSettings)
|
||||||
|
.filter(
|
||||||
|
([param, value]) =>
|
||||||
|
param != "fallbacks" &&
|
||||||
|
param != "context_window_fallbacks" &&
|
||||||
|
param != "routing_strategy_args"
|
||||||
|
)
|
||||||
|
.map(([param, value]) => (
|
||||||
<TableRow key={param}>
|
<TableRow key={param}>
|
||||||
<TableCell>
|
<TableCell>
|
||||||
<Text>{param}</Text>
|
<Text>{param}</Text>
|
||||||
<p style={{fontSize: '0.65rem', color: '#808080', fontStyle: 'italic'}} className="mt-1">{paramExplanation[param]}</p>
|
<p
|
||||||
|
style={{
|
||||||
|
fontSize: "0.65rem",
|
||||||
|
color: "#808080",
|
||||||
|
fontStyle: "italic",
|
||||||
|
}}
|
||||||
|
className="mt-1"
|
||||||
|
>
|
||||||
|
{paramExplanation[param]}
|
||||||
|
</p>
|
||||||
</TableCell>
|
</TableCell>
|
||||||
<TableCell>
|
<TableCell>
|
||||||
{
|
{param == "routing_strategy" ? (
|
||||||
param == "routing_strategy" ?
|
<Select2
|
||||||
<Select2 defaultValue={value} className="w-full max-w-md" onValueChange={setSelectedStrategy}>
|
defaultValue={value}
|
||||||
<SelectItem value="usage-based-routing">usage-based-routing</SelectItem>
|
className="w-full max-w-md"
|
||||||
<SelectItem value="latency-based-routing">latency-based-routing</SelectItem>
|
onValueChange={setSelectedStrategy}
|
||||||
<SelectItem value="simple-shuffle">simple-shuffle</SelectItem>
|
>
|
||||||
</Select2> :
|
<SelectItem value="usage-based-routing">
|
||||||
|
usage-based-routing
|
||||||
|
</SelectItem>
|
||||||
|
<SelectItem value="latency-based-routing">
|
||||||
|
latency-based-routing
|
||||||
|
</SelectItem>
|
||||||
|
<SelectItem value="simple-shuffle">
|
||||||
|
simple-shuffle
|
||||||
|
</SelectItem>
|
||||||
|
</Select2>
|
||||||
|
) : (
|
||||||
<TextInput
|
<TextInput
|
||||||
name={param}
|
name={param}
|
||||||
defaultValue={
|
defaultValue={
|
||||||
typeof value === 'object' ? JSON.stringify(value, null, 2) : value.toString()
|
typeof value === "object"
|
||||||
|
? JSON.stringify(value, null, 2)
|
||||||
|
: value.toString()
|
||||||
}
|
}
|
||||||
/>
|
/>
|
||||||
}
|
)}
|
||||||
</TableCell>
|
</TableCell>
|
||||||
</TableRow>
|
</TableRow>
|
||||||
))}
|
))}
|
||||||
|
@ -323,15 +508,21 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
|
||||||
<AccordionHero
|
<AccordionHero
|
||||||
selectedStrategy={selectedStrategy}
|
selectedStrategy={selectedStrategy}
|
||||||
strategyArgs={
|
strategyArgs={
|
||||||
routerSettings && routerSettings['routing_strategy_args'] && Object.keys(routerSettings['routing_strategy_args']).length > 0
|
routerSettings &&
|
||||||
? routerSettings['routing_strategy_args']
|
routerSettings["routing_strategy_args"] &&
|
||||||
|
Object.keys(routerSettings["routing_strategy_args"])
|
||||||
|
.length > 0
|
||||||
|
? routerSettings["routing_strategy_args"]
|
||||||
: defaultLowestLatencyArgs // default value when keys length is 0
|
: defaultLowestLatencyArgs // default value when keys length is 0
|
||||||
}
|
}
|
||||||
paramExplanation={paramExplanation}
|
paramExplanation={paramExplanation}
|
||||||
/>
|
/>
|
||||||
</Card>
|
</Card>
|
||||||
<Col>
|
<Col>
|
||||||
<Button className="mt-2" onClick={() => handleSaveChanges(routerSettings)}>
|
<Button
|
||||||
|
className="mt-2"
|
||||||
|
onClick={() => handleSaveChanges(routerSettings)}
|
||||||
|
>
|
||||||
Save Changes
|
Save Changes
|
||||||
</Button>
|
</Button>
|
||||||
</Col>
|
</Col>
|
||||||
|
@ -347,15 +538,21 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
|
||||||
</TableHead>
|
</TableHead>
|
||||||
|
|
||||||
<TableBody>
|
<TableBody>
|
||||||
{
|
{routerSettings["fallbacks"] &&
|
||||||
routerSettings["fallbacks"] &&
|
routerSettings["fallbacks"].map(
|
||||||
routerSettings["fallbacks"].map((item: Object, index: number) =>
|
(item: Object, index: number) =>
|
||||||
Object.entries(item).map(([key, value]) => (
|
Object.entries(item).map(([key, value]) => (
|
||||||
<TableRow key={index.toString() + key}>
|
<TableRow key={index.toString() + key}>
|
||||||
<TableCell>{key}</TableCell>
|
<TableCell>{key}</TableCell>
|
||||||
<TableCell>{Array.isArray(value) ? value.join(', ') : value}</TableCell>
|
|
||||||
<TableCell>
|
<TableCell>
|
||||||
<Button onClick={() => testFallbackModelResponse(key, accessToken)}>
|
{Array.isArray(value) ? value.join(", ") : value}
|
||||||
|
</TableCell>
|
||||||
|
<TableCell>
|
||||||
|
<Button
|
||||||
|
onClick={() =>
|
||||||
|
testFallbackModelResponse(key, accessToken)
|
||||||
|
}
|
||||||
|
>
|
||||||
Test Fallback
|
Test Fallback
|
||||||
</Button>
|
</Button>
|
||||||
</TableCell>
|
</TableCell>
|
||||||
|
@ -368,11 +565,96 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
|
||||||
</TableCell>
|
</TableCell>
|
||||||
</TableRow>
|
</TableRow>
|
||||||
))
|
))
|
||||||
)
|
)}
|
||||||
}
|
|
||||||
</TableBody>
|
</TableBody>
|
||||||
</Table>
|
</Table>
|
||||||
<AddFallbacks models={modelData?.data ? modelData.data.map((data: any) => data.model_name) : []} accessToken={accessToken} routerSettings={routerSettings} setRouterSettings={setRouterSettings}/>
|
<AddFallbacks
|
||||||
|
models={
|
||||||
|
modelData?.data
|
||||||
|
? modelData.data.map((data: any) => data.model_name)
|
||||||
|
: []
|
||||||
|
}
|
||||||
|
accessToken={accessToken}
|
||||||
|
routerSettings={routerSettings}
|
||||||
|
setRouterSettings={setRouterSettings}
|
||||||
|
/>
|
||||||
|
</TabPanel>
|
||||||
|
<TabPanel>
|
||||||
|
<Card>
|
||||||
|
<Table>
|
||||||
|
<TableHead>
|
||||||
|
<TableRow>
|
||||||
|
<TableHeaderCell>Setting</TableHeaderCell>
|
||||||
|
<TableHeaderCell>Value</TableHeaderCell>
|
||||||
|
<TableHeaderCell>Status</TableHeaderCell>
|
||||||
|
<TableHeaderCell>Action</TableHeaderCell>
|
||||||
|
</TableRow>
|
||||||
|
</TableHead>
|
||||||
|
<TableBody>
|
||||||
|
{generalSettings.map((value, index) => (
|
||||||
|
<TableRow key={index}>
|
||||||
|
<TableCell>
|
||||||
|
<Text>{value.field_name}</Text>
|
||||||
|
<p
|
||||||
|
style={{
|
||||||
|
fontSize: "0.65rem",
|
||||||
|
color: "#808080",
|
||||||
|
fontStyle: "italic",
|
||||||
|
}}
|
||||||
|
className="mt-1"
|
||||||
|
>
|
||||||
|
{value.field_description}
|
||||||
|
</p>
|
||||||
|
</TableCell>
|
||||||
|
<TableCell>
|
||||||
|
{value.field_type == "Integer" ? (
|
||||||
|
<InputNumber
|
||||||
|
step={1}
|
||||||
|
value={value.field_value}
|
||||||
|
onChange={(newValue) =>
|
||||||
|
handleInputChange(value.field_name, newValue)
|
||||||
|
} // Handle value change
|
||||||
|
/>
|
||||||
|
) : null}
|
||||||
|
</TableCell>
|
||||||
|
<TableCell>
|
||||||
|
{value.stored_in_db == true ? (
|
||||||
|
<Badge icon={CheckCircleIcon} className="text-white">
|
||||||
|
In DB
|
||||||
|
</Badge>
|
||||||
|
) : value.stored_in_db == false ? (
|
||||||
|
<Badge className="text-gray bg-white outline">
|
||||||
|
In Config
|
||||||
|
</Badge>
|
||||||
|
) : (
|
||||||
|
<Badge className="text-gray bg-white outline">
|
||||||
|
Not Set
|
||||||
|
</Badge>
|
||||||
|
)}
|
||||||
|
</TableCell>
|
||||||
|
<TableCell>
|
||||||
|
<Button
|
||||||
|
onClick={() =>
|
||||||
|
handleUpdateField(value.field_name, index)
|
||||||
|
}
|
||||||
|
>
|
||||||
|
Update
|
||||||
|
</Button>
|
||||||
|
<Icon
|
||||||
|
icon={TrashIcon}
|
||||||
|
color="red"
|
||||||
|
onClick={() =>
|
||||||
|
handleResetField(value.field_name, index)
|
||||||
|
}
|
||||||
|
>
|
||||||
|
Reset
|
||||||
|
</Icon>
|
||||||
|
</TableCell>
|
||||||
|
</TableRow>
|
||||||
|
))}
|
||||||
|
</TableBody>
|
||||||
|
</Table>
|
||||||
|
</Card>
|
||||||
</TabPanel>
|
</TabPanel>
|
||||||
</TabPanels>
|
</TabPanels>
|
||||||
</TabGroup>
|
</TabGroup>
|
||||||
|
|
|
@ -14,15 +14,17 @@ export interface Model {
|
||||||
|
|
||||||
export const modelCostMap = async () => {
|
export const modelCostMap = async () => {
|
||||||
try {
|
try {
|
||||||
const response = await fetch('https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json');
|
const response = await fetch(
|
||||||
|
"https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
|
||||||
|
);
|
||||||
const jsonData = await response.json();
|
const jsonData = await response.json();
|
||||||
console.log(`received data: ${jsonData}`)
|
console.log(`received data: ${jsonData}`);
|
||||||
return jsonData
|
return jsonData;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Failed to get model cost map:", error);
|
console.error("Failed to get model cost map:", error);
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
|
||||||
export const modelCreateCall = async (
|
export const modelCreateCall = async (
|
||||||
accessToken: string,
|
accessToken: string,
|
||||||
|
@ -50,19 +52,21 @@ export const modelCreateCall = async (
|
||||||
|
|
||||||
const data = await response.json();
|
const data = await response.json();
|
||||||
console.log("API Response:", data);
|
console.log("API Response:", data);
|
||||||
message.success("Model created successfully. Wait 60s and refresh on 'All Models' page");
|
message.success(
|
||||||
|
"Model created successfully. Wait 60s and refresh on 'All Models' page"
|
||||||
|
);
|
||||||
return data;
|
return data;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Failed to create key:", error);
|
console.error("Failed to create key:", error);
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
|
||||||
export const modelDeleteCall = async (
|
export const modelDeleteCall = async (
|
||||||
accessToken: string,
|
accessToken: string,
|
||||||
model_id: string,
|
model_id: string
|
||||||
) => {
|
) => {
|
||||||
console.log(`model_id in model delete call: ${model_id}`)
|
console.log(`model_id in model delete call: ${model_id}`);
|
||||||
try {
|
try {
|
||||||
const url = proxyBaseUrl ? `${proxyBaseUrl}/model/delete` : `/model/delete`;
|
const url = proxyBaseUrl ? `${proxyBaseUrl}/model/delete` : `/model/delete`;
|
||||||
const response = await fetch(url, {
|
const response = await fetch(url, {
|
||||||
|
@ -72,7 +76,7 @@ export const modelDeleteCall = async (
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
},
|
},
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
"id": model_id,
|
id: model_id,
|
||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -91,7 +95,7 @@ export const modelDeleteCall = async (
|
||||||
console.error("Failed to create key:", error);
|
console.error("Failed to create key:", error);
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
|
||||||
export const keyCreateCall = async (
|
export const keyCreateCall = async (
|
||||||
accessToken: string,
|
accessToken: string,
|
||||||
|
@ -280,8 +284,7 @@ export const teamDeleteCall = async (accessToken: String, teamID: String) => {
|
||||||
console.error("Failed to delete key:", error);
|
console.error("Failed to delete key:", error);
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
|
};
|
||||||
}
|
|
||||||
|
|
||||||
export const userInfoCall = async (
|
export const userInfoCall = async (
|
||||||
accessToken: String,
|
accessToken: String,
|
||||||
|
@ -300,7 +303,7 @@ export const userInfoCall = async (
|
||||||
url = `${url}?user_id=${userID}`;
|
url = `${url}?user_id=${userID}`;
|
||||||
}
|
}
|
||||||
console.log("in userInfoCall viewAll=", viewAll);
|
console.log("in userInfoCall viewAll=", viewAll);
|
||||||
if (viewAll && page_size && (page != null) && (page != undefined)) {
|
if (viewAll && page_size && page != null && page != undefined) {
|
||||||
url = `${url}?view_all=true&page=${page}&page_size=${page_size}`;
|
url = `${url}?view_all=true&page=${page}&page_size=${page_size}`;
|
||||||
}
|
}
|
||||||
//message.info("Requesting user data");
|
//message.info("Requesting user data");
|
||||||
|
@ -329,10 +332,9 @@ export const userInfoCall = async (
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
export const teamInfoCall = async (
|
export const teamInfoCall = async (
|
||||||
accessToken: String,
|
accessToken: String,
|
||||||
teamID: String | null,
|
teamID: String | null
|
||||||
) => {
|
) => {
|
||||||
try {
|
try {
|
||||||
let url = proxyBaseUrl ? `${proxyBaseUrl}/team/info` : `/team/info`;
|
let url = proxyBaseUrl ? `${proxyBaseUrl}/team/info` : `/team/info`;
|
||||||
|
@ -364,10 +366,7 @@ export const teamInfoCall = async (
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const getTotalSpendCall = async (accessToken: String) => {
|
||||||
export const getTotalSpendCall = async (
|
|
||||||
accessToken: String,
|
|
||||||
) => {
|
|
||||||
/**
|
/**
|
||||||
* Get all models on proxy
|
* Get all models on proxy
|
||||||
*/
|
*/
|
||||||
|
@ -435,7 +434,6 @@ export const modelInfoCall = async (
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
export const modelMetricsCall = async (
|
export const modelMetricsCall = async (
|
||||||
accessToken: String,
|
accessToken: String,
|
||||||
userID: String,
|
userID: String,
|
||||||
|
@ -450,7 +448,7 @@ export const modelMetricsCall = async (
|
||||||
try {
|
try {
|
||||||
let url = proxyBaseUrl ? `${proxyBaseUrl}/model/metrics` : `/model/metrics`;
|
let url = proxyBaseUrl ? `${proxyBaseUrl}/model/metrics` : `/model/metrics`;
|
||||||
if (modelGroup) {
|
if (modelGroup) {
|
||||||
url = `${url}?_selected_model_group=${modelGroup}&startTime=${startTime}&endTime=${endTime}`
|
url = `${url}?_selected_model_group=${modelGroup}&startTime=${startTime}&endTime=${endTime}`;
|
||||||
}
|
}
|
||||||
// message.info("Requesting model data");
|
// message.info("Requesting model data");
|
||||||
const response = await fetch(url, {
|
const response = await fetch(url, {
|
||||||
|
@ -476,8 +474,6 @@ export const modelMetricsCall = async (
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
export const modelMetricsSlowResponsesCall = async (
|
export const modelMetricsSlowResponsesCall = async (
|
||||||
accessToken: String,
|
accessToken: String,
|
||||||
userID: String,
|
userID: String,
|
||||||
|
@ -490,9 +486,11 @@ export const modelMetricsSlowResponsesCall = async (
|
||||||
* Get all models on proxy
|
* Get all models on proxy
|
||||||
*/
|
*/
|
||||||
try {
|
try {
|
||||||
let url = proxyBaseUrl ? `${proxyBaseUrl}/model/metrics/slow_responses` : `/model/metrics/slow_responses`;
|
let url = proxyBaseUrl
|
||||||
|
? `${proxyBaseUrl}/model/metrics/slow_responses`
|
||||||
|
: `/model/metrics/slow_responses`;
|
||||||
if (modelGroup) {
|
if (modelGroup) {
|
||||||
url = `${url}?_selected_model_group=${modelGroup}&startTime=${startTime}&endTime=${endTime}`
|
url = `${url}?_selected_model_group=${modelGroup}&startTime=${startTime}&endTime=${endTime}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
// message.info("Requesting model data");
|
// message.info("Requesting model data");
|
||||||
|
@ -519,7 +517,6 @@ export const modelMetricsSlowResponsesCall = async (
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
export const modelExceptionsCall = async (
|
export const modelExceptionsCall = async (
|
||||||
accessToken: String,
|
accessToken: String,
|
||||||
userID: String,
|
userID: String,
|
||||||
|
@ -532,10 +529,12 @@ export const modelExceptionsCall = async (
|
||||||
* Get all models on proxy
|
* Get all models on proxy
|
||||||
*/
|
*/
|
||||||
try {
|
try {
|
||||||
let url = proxyBaseUrl ? `${proxyBaseUrl}/model/metrics/exceptions` : `/model/metrics/exceptions`;
|
let url = proxyBaseUrl
|
||||||
|
? `${proxyBaseUrl}/model/metrics/exceptions`
|
||||||
|
: `/model/metrics/exceptions`;
|
||||||
|
|
||||||
if (modelGroup) {
|
if (modelGroup) {
|
||||||
url = `${url}?_selected_model_group=${modelGroup}&startTime=${startTime}&endTime=${endTime}`
|
url = `${url}?_selected_model_group=${modelGroup}&startTime=${startTime}&endTime=${endTime}`;
|
||||||
}
|
}
|
||||||
const response = await fetch(url, {
|
const response = await fetch(url, {
|
||||||
method: "GET",
|
method: "GET",
|
||||||
|
@ -560,7 +559,6 @@ export const modelExceptionsCall = async (
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
export const modelAvailableCall = async (
|
export const modelAvailableCall = async (
|
||||||
accessToken: String,
|
accessToken: String,
|
||||||
userID: String,
|
userID: String,
|
||||||
|
@ -625,7 +623,6 @@ export const keySpendLogsCall = async (accessToken: String, token: String) => {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
export const teamSpendLogsCall = async (accessToken: String) => {
|
export const teamSpendLogsCall = async (accessToken: String) => {
|
||||||
try {
|
try {
|
||||||
const url = proxyBaseUrl
|
const url = proxyBaseUrl
|
||||||
|
@ -654,7 +651,6 @@ export const teamSpendLogsCall = async (accessToken: String) => {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
export const tagsSpendLogsCall = async (
|
export const tagsSpendLogsCall = async (
|
||||||
accessToken: String,
|
accessToken: String,
|
||||||
startTime: String | undefined,
|
startTime: String | undefined,
|
||||||
|
@ -666,7 +662,7 @@ export const tagsSpendLogsCall = async (
|
||||||
: `/global/spend/tags`;
|
: `/global/spend/tags`;
|
||||||
|
|
||||||
if (startTime && endTime) {
|
if (startTime && endTime) {
|
||||||
url = `${url}?start_date=${startTime}&end_date=${endTime}`
|
url = `${url}?start_date=${startTime}&end_date=${endTime}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log("in tagsSpendLogsCall:", url);
|
console.log("in tagsSpendLogsCall:", url);
|
||||||
|
@ -692,7 +688,6 @@ export const tagsSpendLogsCall = async (
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
export const userSpendLogsCall = async (
|
export const userSpendLogsCall = async (
|
||||||
accessToken: String,
|
accessToken: String,
|
||||||
token: String,
|
token: String,
|
||||||
|
@ -806,7 +801,11 @@ export const adminTopEndUsersCall = async (
|
||||||
|
|
||||||
let body = "";
|
let body = "";
|
||||||
if (keyToken) {
|
if (keyToken) {
|
||||||
body = JSON.stringify({ api_key: keyToken, startTime: startTime, endTime: endTime });
|
body = JSON.stringify({
|
||||||
|
api_key: keyToken,
|
||||||
|
startTime: startTime,
|
||||||
|
endTime: endTime,
|
||||||
|
});
|
||||||
} else {
|
} else {
|
||||||
body = JSON.stringify({ startTime: startTime, endTime: endTime });
|
body = JSON.stringify({ startTime: startTime, endTime: endTime });
|
||||||
}
|
}
|
||||||
|
@ -1079,7 +1078,6 @@ export const teamCreateCall = async (
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
export const keyUpdateCall = async (
|
export const keyUpdateCall = async (
|
||||||
accessToken: string,
|
accessToken: string,
|
||||||
formValues: Record<string, any> // Assuming formValues is an object
|
formValues: Record<string, any> // Assuming formValues is an object
|
||||||
|
@ -1347,9 +1345,10 @@ export const slackBudgetAlertsHealthCheck = async (accessToken: String) => {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const serviceHealthCheck = async (
|
||||||
|
accessToken: String,
|
||||||
export const serviceHealthCheck= async (accessToken: String, service: String) => {
|
service: String
|
||||||
|
) => {
|
||||||
try {
|
try {
|
||||||
let url = proxyBaseUrl
|
let url = proxyBaseUrl
|
||||||
? `${proxyBaseUrl}/health/services?service=${service}`
|
? `${proxyBaseUrl}/health/services?service=${service}`
|
||||||
|
@ -1373,7 +1372,9 @@ export const serviceHealthCheck= async (accessToken: String, service: String) =>
|
||||||
}
|
}
|
||||||
|
|
||||||
const data = await response.json();
|
const data = await response.json();
|
||||||
message.success(`Test request to ${service} made - check logs/alerts on ${service} to verify`);
|
message.success(
|
||||||
|
`Test request to ${service} made - check logs/alerts on ${service} to verify`
|
||||||
|
);
|
||||||
// You can add additional logic here based on the response if needed
|
// You can add additional logic here based on the response if needed
|
||||||
return data;
|
return data;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
@ -1382,9 +1383,6 @@ export const serviceHealthCheck= async (accessToken: String, service: String) =>
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
export const getCallbacksCall = async (
|
export const getCallbacksCall = async (
|
||||||
accessToken: String,
|
accessToken: String,
|
||||||
userID: String,
|
userID: String,
|
||||||
|
@ -1394,7 +1392,9 @@ export const getCallbacksCall = async (
|
||||||
* Get all the models user has access to
|
* Get all the models user has access to
|
||||||
*/
|
*/
|
||||||
try {
|
try {
|
||||||
let url = proxyBaseUrl ? `${proxyBaseUrl}/get/config/callbacks` : `/get/config/callbacks`;
|
let url = proxyBaseUrl
|
||||||
|
? `${proxyBaseUrl}/get/config/callbacks`
|
||||||
|
: `/get/config/callbacks`;
|
||||||
|
|
||||||
//message.info("Requesting model data");
|
//message.info("Requesting model data");
|
||||||
const response = await fetch(url, {
|
const response = await fetch(url, {
|
||||||
|
@ -1421,11 +1421,117 @@ export const getCallbacksCall = async (
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const getGeneralSettingsCall = async (accessToken: String) => {
|
||||||
|
try {
|
||||||
|
let url = proxyBaseUrl
|
||||||
|
? `${proxyBaseUrl}/config/list?config_type=general_settings`
|
||||||
|
: `/config/list?config_type=general_settings`;
|
||||||
|
|
||||||
|
//message.info("Requesting model data");
|
||||||
|
const response = await fetch(url, {
|
||||||
|
method: "GET",
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${accessToken}`,
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorData = await response.text();
|
||||||
|
message.error(errorData, 10);
|
||||||
|
throw new Error("Network response was not ok");
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
//message.info("Received model data");
|
||||||
|
return data;
|
||||||
|
// Handle success - you might want to update some state or UI based on the created key
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Failed to get callbacks:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
export const updateConfigFieldSetting = async (
|
||||||
|
accessToken: String,
|
||||||
|
fieldName: string,
|
||||||
|
fieldValue: any
|
||||||
|
) => {
|
||||||
|
try {
|
||||||
|
let url = proxyBaseUrl
|
||||||
|
? `${proxyBaseUrl}/config/field/update`
|
||||||
|
: `/config/field/update`;
|
||||||
|
|
||||||
|
let formData = {
|
||||||
|
field_name: fieldName,
|
||||||
|
field_value: fieldValue,
|
||||||
|
config_type: "general_settings",
|
||||||
|
};
|
||||||
|
//message.info("Requesting model data");
|
||||||
|
const response = await fetch(url, {
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${accessToken}`,
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
body: JSON.stringify(formData),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorData = await response.text();
|
||||||
|
message.error(errorData, 10);
|
||||||
|
throw new Error("Network response was not ok");
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
//message.info("Received model data");
|
||||||
|
message.success("Successfully updated value!");
|
||||||
|
return data;
|
||||||
|
// Handle success - you might want to update some state or UI based on the created key
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Failed to set callbacks:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
export const deleteConfigFieldSetting = async (
|
||||||
|
accessToken: String,
|
||||||
|
fieldName: String
|
||||||
|
) => {
|
||||||
|
try {
|
||||||
|
let url = proxyBaseUrl
|
||||||
|
? `${proxyBaseUrl}/config/field/delete`
|
||||||
|
: `/config/field/delete`;
|
||||||
|
|
||||||
|
let formData = {
|
||||||
|
field_name: fieldName,
|
||||||
|
config_type: "general_settings",
|
||||||
|
};
|
||||||
|
//message.info("Requesting model data");
|
||||||
|
const response = await fetch(url, {
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${accessToken}`,
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
body: JSON.stringify(formData),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorData = await response.text();
|
||||||
|
message.error(errorData, 10);
|
||||||
|
throw new Error("Network response was not ok");
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
message.success("Field reset on proxy");
|
||||||
|
return data;
|
||||||
|
// Handle success - you might want to update some state or UI based on the created key
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Failed to get callbacks:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
};
|
||||||
export const setCallbacksCall = async (
|
export const setCallbacksCall = async (
|
||||||
accessToken: String,
|
accessToken: String,
|
||||||
formValues: Record<string, any>
|
formValues: Record<string, any>
|
||||||
|
@ -1464,9 +1570,7 @@ export const setCallbacksCall = async (
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
export const healthCheckCall = async (
|
export const healthCheckCall = async (accessToken: String) => {
|
||||||
accessToken: String,
|
|
||||||
) => {
|
|
||||||
/**
|
/**
|
||||||
* Get all the models user has access to
|
* Get all the models user has access to
|
||||||
*/
|
*/
|
||||||
|
@ -1497,6 +1601,3 @@ export const healthCheckCall = async (
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue