build: merge fix

2025-04-24 18:24:20 +00:00 · 2025-03-19 20:27:15 -05:00 · 2025-03-19 20:27:15 -05:00 · 4082138edd
commit 4082138edd
parent 9dc8fb982d
1 changed files with 23 additions and 9 deletions
--- a/litellm/router_strategy/lowest_latency.py
+++ b/litellm/router_strategy/lowest_latency.py
@ -124,9 +124,14 @@ class LowestLatencyLoggingHandler(CustomLogger):
                    len(request_count_dict[id].get("latency", []))
                    < self.routing_args.max_latency_list_size
                ):
-                    latency_value = final_value.total_seconds() if isinstance(final_value, timedelta) else final_value
-                    request_count_dict[id].setdefault("latency", []).append(latency_value)
-
+                    latency_value = (
+                        final_value.total_seconds()
+                        if isinstance(final_value, timedelta)
+                        else final_value
+                    )
+                    request_count_dict[id].setdefault("latency", []).append(
+                        latency_value
+                    )
                else:
                    request_count_dict[id]["latency"] = request_count_dict[id][
                        "latency"
@ -334,9 +339,14 @@ class LowestLatencyLoggingHandler(CustomLogger):
                    len(request_count_dict[id].get("latency", []))
                    < self.routing_args.max_latency_list_size
                ):
-                    latency_value = final_value.total_seconds() if isinstance(final_value, timedelta) else final_value
-                    request_count_dict[id].setdefault("latency", []).append(latency_value)
-
+                    latency_value = (
+                        final_value.total_seconds()
+                        if isinstance(final_value, timedelta)
+                        else final_value
+                    )
+                    request_count_dict[id].setdefault("latency", []).append(
+                        latency_value
+                    )
                else:
                    request_count_dict[id]["latency"] = request_count_dict[id][
                        "latency"
@ -476,7 +486,7 @@ class LowestLatencyLoggingHandler(CustomLogger):
                for _call_latency in item_ttft_latency:
                    if isinstance(_call_latency, float):
                        total += _call_latency
-                total = total / len(item_ttft_latency) if total > 0 else float('inf')
+                total = total / len(item_ttft_latency) if total > 0 else float("inf")
            elif len(item_latency) > 0 and isinstance(item_latency[0], timedelta):
                for _call_latency in item_latency:
                    total += _call_latency.total_seconds()
@ -484,7 +494,7 @@ class LowestLatencyLoggingHandler(CustomLogger):
                for _call_latency in item_latency:
                    if isinstance(_call_latency, float):
                        total += _call_latency
-            item_latency = total / len(item_latency) if total > 0 else float('inf')
+            item_latency = total / len(item_latency) if total > 0 else float("inf")

            # -------------- #
            # Debugging Logic
@ -511,6 +521,9 @@ class LowestLatencyLoggingHandler(CustomLogger):
        if len(potential_deployments) == 0:
            return None

+        if not healthy_deployments:
+            return None
+
        # Sort potential deployments by latency
        sorted_deployments = sorted(potential_deployments, key=lambda x: x[1])

@ -520,9 +533,10 @@ class LowestLatencyLoggingHandler(CustomLogger):
        # Find deployments within buffer of lowest latency
        buffer = self.routing_args.lowest_latency_buffer * lowest_latency

+        # If no deployments within buffer, fall back to all sorted deployments
        valid_deployments = [
            x for x in sorted_deployments if x[1] <= lowest_latency + buffer
-        ]
+        ] or sorted_deployments

        # Pick a random deployment from valid deployments
        random_valid_deployment = random.choice(valid_deployments)