fix(router.py): handle 1 deployment being picked

2024-04-10 18:32:54 -07:00 · 2024-04-10 18:32:54 -07:00 · 266dba65e7
commit 266dba65e7
parent 52462e8bac
1 changed files with 21 additions and 24 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -2434,8 +2434,22 @@ class Router:
    ):
        """
        Common checks for 'get_available_deployment' across sync + async call.
+
+        If 'healthy_deployments' returned is None, this means the user chose a specific deployment
        """
        # check if aliases set on litellm model alias map
+        if specific_deployment == True:
+            # users can also specify a specific deployment name. At this point we should check if they are just trying to call a specific deployment
+            for deployment in self.model_list:
+                deployment_model = deployment.get("litellm_params").get("model")
+                if deployment_model == model:
+                    # User Passed a specific deployment name on their config.yaml, example azure/chat-gpt-v-2
+                    # return the first deployment where the `model` matches the specificed deployment name
+                    return deployment, None
+            raise ValueError(
+                f"LiteLLM Router: Trying to call specific deployment, but Model:{model} does not exist in Model List: {self.model_list}"
+            )
+
        if model in self.model_group_alias:
            verbose_router_logger.debug(
                f"Using a model alias. Got Request for {model}, sending requests to {self.model_group_alias.get(model)}"
@ -2447,7 +2461,7 @@ class Router:
                self.default_deployment
            )  # self.default_deployment
            updated_deployment["litellm_params"]["model"] = model
-            return updated_deployment
+            return updated_deployment, None

        ## get healthy deployments
        ### get all deployments
@ -2496,18 +2510,6 @@ class Router:
                specific_deployment=specific_deployment,
            )

-        if specific_deployment == True:
-            # users can also specify a specific deployment name. At this point we should check if they are just trying to call a specific deployment
-            for deployment in self.model_list:
-                deployment_model = deployment.get("litellm_params").get("model")
-                if deployment_model == model:
-                    # User Passed a specific deployment name on their config.yaml, example azure/chat-gpt-v-2
-                    # return the first deployment where the `model` matches the specificed deployment name
-                    return deployment
-            raise ValueError(
-                f"LiteLLM Router: Trying to call specific deployment, but Model:{model} does not exist in Model List: {self.model_list}"
-            )
-
        model, healthy_deployments = self._common_checks_available_deployment(
            model=model,
            messages=messages,
@ -2515,6 +2517,9 @@ class Router:
            specific_deployment=specific_deployment,
        )

+        if healthy_deployments is None:
+            return model
+
        # filter out the deployments currently cooling down
        deployments_to_remove = []
        # cooldown_deployments is a list of model_id's cooling down, cooldown_deployments = ["16700539-b3cd-42f4-b426-6a12a1bb706a", "16700539-b3cd-42f4-b426-7899"]
@ -2572,17 +2577,6 @@ class Router:
        """
        # users need to explicitly call a specific deployment, by setting `specific_deployment = True` as completion()/embedding() kwarg
        # When this was no explicit we had several issues with fallbacks timing out
-        if specific_deployment == True:
-            # users can also specify a specific deployment name. At this point we should check if they are just trying to call a specific deployment
-            for deployment in self.model_list:
-                deployment_model = deployment.get("litellm_params").get("model")
-                if deployment_model == model:
-                    # User Passed a specific deployment name on their config.yaml, example azure/chat-gpt-v-2
-                    # return the first deployment where the `model` matches the specificed deployment name
-                    return deployment
-            raise ValueError(
-                f"LiteLLM Router: Trying to call specific deployment, but Model:{model} does not exist in Model List: {self.model_list}"
-            )

        model, healthy_deployments = self._common_checks_available_deployment(
            model=model,
@ -2591,6 +2585,9 @@ class Router:
            specific_deployment=specific_deployment,
        )

+        if healthy_deployments is None:
+            return model
+
        # filter out the deployments currently cooling down
        deployments_to_remove = []
        # cooldown_deployments is a list of model_id's cooling down, cooldown_deployments = ["16700539-b3cd-42f4-b426-6a12a1bb706a", "16700539-b3cd-42f4-b426-7899"]