From 9d3f01c6ae23e63f20ff1f066b5ca12a9a29d5e1 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 10 May 2024 12:32:16 -0700 Subject: [PATCH 1/9] fix - router add model logic --- litellm/router.py | 40 ++++++++++++++++------------------------ 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index 39d49a147..5e71fea58 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2558,20 +2558,20 @@ class Router: self.set_client(model=deployment.to_json(exclude_none=True)) # set region (if azure model) - try: - if "azure" in deployment.litellm_params.model: - region = litellm.utils.get_model_region( - litellm_params=deployment.litellm_params, mode=None - ) + # try: + # if "azure" in deployment.litellm_params.model: + # region = litellm.utils.get_model_region( + # litellm_params=deployment.litellm_params, mode=None + # ) - deployment.litellm_params.region_name = region - except Exception as e: - verbose_router_logger.error( - "Unable to get the region for azure model - {}, {}".format( - deployment.litellm_params.model, str(e) - ) - ) - pass # [NON-BLOCKING] + # deployment.litellm_params.region_name = region + # except Exception as e: + # verbose_router_logger.error( + # "Unable to get the region for azure model - {}, {}".format( + # deployment.litellm_params.model, str(e) + # ) + # ) + # pass # [NON-BLOCKING] return deployment @@ -2610,7 +2610,6 @@ class Router: - The added/updated deployment """ # check if deployment already exists - if deployment.model_info.id in self.get_model_ids(): # remove the previous deployment removal_idx: Optional[int] = None @@ -2620,16 +2619,9 @@ class Router: if removal_idx is not None: self.model_list.pop(removal_idx) - - # add to model list - _deployment = deployment.to_json(exclude_none=True) - self.model_list.append(_deployment) - - # initialize client - self._add_deployment(deployment=deployment) - - # add to model names - self.model_names.append(deployment.model_name) + else: + # if the model_id is not in router + self.add_deployment(deployment=deployment) return deployment def delete_deployment(self, id: str) -> Optional[Deployment]: From 6fd6490d6304043d9687723fb495a67e2749ac36 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 10 May 2024 12:38:06 -0700 Subject: [PATCH 2/9] fix hide - _auto_infer_region behind a feature flag --- litellm/router.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index 5e71fea58..3c777e2f0 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2558,20 +2558,23 @@ class Router: self.set_client(model=deployment.to_json(exclude_none=True)) # set region (if azure model) - # try: - # if "azure" in deployment.litellm_params.model: - # region = litellm.utils.get_model_region( - # litellm_params=deployment.litellm_params, mode=None - # ) + _auto_infer_region = os.environ.get("AUTO_INFER_REGION", "true") + _auto_infer_region_value = bool(_auto_infer_region) + if _auto_infer_region_value == True: + try: + if "azure" in deployment.litellm_params.model: + region = litellm.utils.get_model_region( + litellm_params=deployment.litellm_params, mode=None + ) - # deployment.litellm_params.region_name = region - # except Exception as e: - # verbose_router_logger.error( - # "Unable to get the region for azure model - {}, {}".format( - # deployment.litellm_params.model, str(e) - # ) - # ) - # pass # [NON-BLOCKING] + deployment.litellm_params.region_name = region + except Exception as e: + verbose_router_logger.error( + "Unable to get the region for azure model - {}, {}".format( + deployment.litellm_params.model, str(e) + ) + ) + pass # [NON-BLOCKING] return deployment From 75d6658bbcb54e9a9d73f38ece0a0f9d7be81866 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 10 May 2024 12:39:19 -0700 Subject: [PATCH 3/9] fix - explain why behind feature flag --- litellm/router.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/litellm/router.py b/litellm/router.py index 3c777e2f0..ac9b4cb47 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2561,6 +2561,10 @@ class Router: _auto_infer_region = os.environ.get("AUTO_INFER_REGION", "true") _auto_infer_region_value = bool(_auto_infer_region) if _auto_infer_region_value == True: + """ + Hiding behind a feature flag + When there is a large amount of LLM deployments this makes startup times blow up + """ try: if "azure" in deployment.litellm_params.model: region = litellm.utils.get_model_region( From 547976448f094af5875285cac400e3872446ee6a Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 10 May 2024 12:50:46 -0700 Subject: [PATCH 4/9] fix feature flag logic --- litellm/router.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index ac9b4cb47..d833c1a85 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2558,9 +2558,8 @@ class Router: self.set_client(model=deployment.to_json(exclude_none=True)) # set region (if azure model) - _auto_infer_region = os.environ.get("AUTO_INFER_REGION", "true") - _auto_infer_region_value = bool(_auto_infer_region) - if _auto_infer_region_value == True: + _auto_infer_region = os.environ.get("AUTO_INFER_REGION", False) + if _auto_infer_region == True: """ Hiding behind a feature flag When there is a large amount of LLM deployments this makes startup times blow up From 5c69515a1347751f25e99671a1260cdc96108be8 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 10 May 2024 13:41:51 -0700 Subject: [PATCH 5/9] fix - upsert_deployment logic --- litellm/router.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index d833c1a85..20566f255 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2606,7 +2606,7 @@ class Router: self.model_names.append(deployment.model_name) return deployment - def upsert_deployment(self, deployment: Deployment) -> Deployment: + def upsert_deployment(self, deployment: Deployment) -> Deployment | None: """ Add or update deployment Parameters: @@ -2616,7 +2616,17 @@ class Router: - The added/updated deployment """ # check if deployment already exists - if deployment.model_info.id in self.get_model_ids(): + _deployment_model_id = deployment.model_info.id or "" + _deployment_on_router: Optional[Deployment] = self.get_deployment( + model_id=_deployment_model_id + ) + if _deployment_on_router is not None: + # deployment with this model_id exists on the router + if deployment.litellm_params == _deployment_on_router.litellm_params: + # No need to update + return None + + # if there is a new litellm param -> then update the deployment # remove the previous deployment removal_idx: Optional[int] = None for idx, model in enumerate(self.model_list): From 9bbb13c373313107a21d62e6e051d65b0afce91b Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 10 May 2024 13:54:52 -0700 Subject: [PATCH 6/9] fix bug upsert_deployment --- litellm/router.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/router.py b/litellm/router.py index 20566f255..32c2b61d1 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2606,7 +2606,7 @@ class Router: self.model_names.append(deployment.model_name) return deployment - def upsert_deployment(self, deployment: Deployment) -> Deployment | None: + def upsert_deployment(self, deployment: Deployment) -> Optional[Deployment]: """ Add or update deployment Parameters: From c744851d139265932123e81447261a174c14b47d Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 10 May 2024 14:08:38 -0700 Subject: [PATCH 7/9] fix AUTO_INFER_REGION --- .circleci/config.yml | 1 + litellm/router.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 1ef8c0e33..08bbad6fa 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -198,6 +198,7 @@ jobs: -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ -e AWS_REGION_NAME=$AWS_REGION_NAME \ + -e AUTO_INFER_REGION="True" \ -e OPENAI_API_KEY=$OPENAI_API_KEY \ -e LANGFUSE_PROJECT1_PUBLIC=$LANGFUSE_PROJECT1_PUBLIC \ -e LANGFUSE_PROJECT2_PUBLIC=$LANGFUSE_PROJECT2_PUBLIC \ diff --git a/litellm/router.py b/litellm/router.py index 32c2b61d1..e8b0f658f 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2559,7 +2559,7 @@ class Router: # set region (if azure model) _auto_infer_region = os.environ.get("AUTO_INFER_REGION", False) - if _auto_infer_region == True: + if _auto_infer_region == True or _auto_infer_region == "True": """ Hiding behind a feature flag When there is a large amount of LLM deployments this makes startup times blow up From 7d96272d52e45969716763d26185feac90852ceb Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 10 May 2024 16:08:05 -0700 Subject: [PATCH 8/9] fix auto inferring region --- .circleci/config.yml | 2 +- litellm/router.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 08bbad6fa..35707dbff 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -198,7 +198,7 @@ jobs: -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ -e AWS_REGION_NAME=$AWS_REGION_NAME \ - -e AUTO_INFER_REGION="True" \ + -e AUTO_INFER_REGION=True \ -e OPENAI_API_KEY=$OPENAI_API_KEY \ -e LANGFUSE_PROJECT1_PUBLIC=$LANGFUSE_PROJECT1_PUBLIC \ -e LANGFUSE_PROJECT2_PUBLIC=$LANGFUSE_PROJECT2_PUBLIC \ diff --git a/litellm/router.py b/litellm/router.py index e8b0f658f..ba777a44d 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2560,6 +2560,7 @@ class Router: # set region (if azure model) _auto_infer_region = os.environ.get("AUTO_INFER_REGION", False) if _auto_infer_region == True or _auto_infer_region == "True": + print("Auto inferring region") # noqa """ Hiding behind a feature flag When there is a large amount of LLM deployments this makes startup times blow up From 1a8e85381757cf37cf7b8459e9cfd381b9a6c759 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 10 May 2024 16:19:03 -0700 Subject: [PATCH 9/9] (ci/cd) run again --- litellm/tests/test_completion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index f726ed95a..d2c1b0b15 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -1142,7 +1142,7 @@ def test_get_hf_task_for_model(): # test_get_hf_task_for_model() -# litellm.set_verbose=False +# litellm.set_verbose = False # ################### Hugging Face TGI models ######################## # # TGI model # # this is a TGI model https://huggingface.co/glaiveai/glaive-coder-7b