fix(router.py): fix latency based routing

2025-04-25 10:44:24 +00:00 · 2023-12-30 17:25:40 +05:30 · 2023-12-30 17:25:40 +05:30 · f2d0d5584a
commit f2d0d5584a
parent c41b1418d4
4 changed files with 443 additions and 14 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -19,6 +19,7 @@ from openai import AsyncOpenAI
 from collections import defaultdict
 from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
 from litellm.router_strategy.lowest_tpm_rpm import LowestTPMLoggingHandler
+from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
 from litellm.llms.custom_httpx.azure_dall_e_2 import (
    CustomHTTPTransport,
    AsyncCustomHTTPTransport,
@ -211,7 +212,12 @@ class Router:
            )
            if isinstance(litellm.callbacks, list):
                litellm.callbacks.append(self.lowesttpm_logger)  # type: ignore
-
+        elif routing_strategy == "latency-based-routing":
+            self.lowestlatency_logger = LowestLatencyLoggingHandler(
+                router_cache=self.cache, model_list=self.model_list
+            )
+            if isinstance(litellm.callbacks, list):
+                litellm.callbacks.append(self.lowestlatency_logger)  # type: ignore
        ## COOLDOWNS ##
        if isinstance(litellm.failure_callback, list):
            litellm.failure_callback.append(self.deployment_callback_on_failure)
@ -1733,18 +1739,16 @@ class Router:
            ############## No RPM/TPM passed, we do a random pick #################
            item = random.choice(healthy_deployments)
            return item or item[0]
-        elif self.routing_strategy == "latency-based-routing":
-            returned_item = None
-            lowest_latency = float("inf")
-            ### shuffles with priority for lowest latency
-            # items_with_latencies = [('A', 10), ('B', 20), ('C', 30), ('D', 40)]
-            items_with_latencies = []
-            for item in healthy_deployments:
-                items_with_latencies.append(
-                    (item, self.deployment_latency_map[item["litellm_params"]["model"]])
-                )
-            returned_item = self.weighted_shuffle_by_latency(items_with_latencies)
-            return returned_item
+        elif (
+            self.routing_strategy == "latency-based-routing"
+            and self.lowestlatency_logger is not None
+        ):
+            min_deployment = self.lowestlatency_logger.get_available_deployments(
+                model_group=model, healthy_deployments=healthy_deployments
+            )
+            if min_deployment is None:
+                min_deployment = random.choice(healthy_deployments)
+            return min_deployment
        elif (
            self.routing_strategy == "usage-based-routing"
            and self.lowesttpm_logger is not None