From 654c736d29b78d4f6c68d1d099c83e9075841cff Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 24 Apr 2024 16:35:00 -0700 Subject: [PATCH] feat - penalize timeout errors --- litellm/router_strategy/lowest_latency.py | 56 +++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/litellm/router_strategy/lowest_latency.py b/litellm/router_strategy/lowest_latency.py index 387f8c210b..96a32372d9 100644 --- a/litellm/router_strategy/lowest_latency.py +++ b/litellm/router_strategy/lowest_latency.py @@ -11,6 +11,7 @@ from litellm.caching import DualCache from litellm.integrations.custom_logger import CustomLogger from litellm import ModelResponse from litellm import token_counter +import litellm class LiteLLMBase(BaseModel): @@ -126,6 +127,61 @@ class LowestLatencyLoggingHandler(CustomLogger): traceback.print_exc() pass + async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): + """ + Check if Timeout Error, if timeout set deployment latency -> 100 + """ + try: + _exception = kwargs.get("exception", None) + if isinstance(_exception, litellm.Timeout): + if kwargs["litellm_params"].get("metadata") is None: + pass + else: + model_group = kwargs["litellm_params"]["metadata"].get( + "model_group", None + ) + + id = kwargs["litellm_params"].get("model_info", {}).get("id", None) + if model_group is None or id is None: + return + elif isinstance(id, int): + id = str(id) + + # ------------ + # Setup values + # ------------ + """ + { + {model_group}_map: { + id: { + "latency": [..] + f"{date:hour:minute}" : {"tpm": 34, "rpm": 3} + } + } + } + """ + latency_key = f"{model_group}_map" + request_count_dict = ( + self.router_cache.get_cache(key=latency_key) or {} + ) + + if id not in request_count_dict: + request_count_dict[id] = {} + + ## Latency + request_count_dict[id].setdefault("latency", []).append(100.0) + self.router_cache.set_cache( + key=latency_key, + value=request_count_dict, + ttl=self.routing_args.ttl, + ) # reset map within window + else: + # do nothing if it's not a timeout error + return + except Exception as e: + traceback.print_exc() + pass + async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): try: """