diff --git a/litellm/router_strategy/lowest_latency.py b/litellm/router_strategy/lowest_latency.py index 9060c956d7..b049c94264 100644 --- a/litellm/router_strategy/lowest_latency.py +++ b/litellm/router_strategy/lowest_latency.py @@ -124,9 +124,7 @@ class LowestLatencyLoggingHandler(CustomLogger): len(request_count_dict[id].get("latency", [])) < self.routing_args.max_latency_list_size ): - latency_value = final_value.total_seconds() if isinstance(final_value, timedelta) else final_value - request_count_dict[id].setdefault("latency", []).append(latency_value) - + request_count_dict[id].setdefault("latency", []).append(final_value) else: request_count_dict[id]["latency"] = request_count_dict[id][ "latency" @@ -334,9 +332,7 @@ class LowestLatencyLoggingHandler(CustomLogger): len(request_count_dict[id].get("latency", [])) < self.routing_args.max_latency_list_size ): - latency_value = final_value.total_seconds() if isinstance(final_value, timedelta) else final_value - request_count_dict[id].setdefault("latency", []).append(latency_value) - + request_count_dict[id].setdefault("latency", []).append(final_value) else: request_count_dict[id]["latency"] = request_count_dict[id][ "latency" @@ -476,15 +472,11 @@ class LowestLatencyLoggingHandler(CustomLogger): for _call_latency in item_ttft_latency: if isinstance(_call_latency, float): total += _call_latency - total = total / len(item_ttft_latency) if total > 0 else float('inf') - elif len(item_latency) > 0 and isinstance(item_latency[0], timedelta): - for _call_latency in item_latency: - total += _call_latency.total_seconds() else: for _call_latency in item_latency: if isinstance(_call_latency, float): total += _call_latency - item_latency = total / len(item_latency) if total > 0 else float('inf') + item_latency = total / len(item_latency) # -------------- # # Debugging Logic @@ -511,9 +503,6 @@ class LowestLatencyLoggingHandler(CustomLogger): if len(potential_deployments) == 0: return None - if not healthy_deployments: - return None - # Sort potential deployments by latency sorted_deployments = sorted(potential_deployments, key=lambda x: x[1]) @@ -523,10 +512,9 @@ class LowestLatencyLoggingHandler(CustomLogger): # Find deployments within buffer of lowest latency buffer = self.routing_args.lowest_latency_buffer * lowest_latency - # If no deployments within buffer, fall back to all sorted deployments valid_deployments = [ - x for x in sorted_deployments if x[1] <= lowest_latency + buffer - ] or sorted_deployments + x for x in sorted_deployments if x[1] <= lowest_latency + buffer + ] # Pick a random deployment from valid deployments random_valid_deployment = random.choice(valid_deployments)