diff --git a/.circleci/config.yml b/.circleci/config.yml index 1ef8c0e33..35707dbff 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -198,6 +198,7 @@ jobs: -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ -e AWS_REGION_NAME=$AWS_REGION_NAME \ + -e AUTO_INFER_REGION=True \ -e OPENAI_API_KEY=$OPENAI_API_KEY \ -e LANGFUSE_PROJECT1_PUBLIC=$LANGFUSE_PROJECT1_PUBLIC \ -e LANGFUSE_PROJECT2_PUBLIC=$LANGFUSE_PROJECT2_PUBLIC \ diff --git a/litellm/router.py b/litellm/router.py index 39d49a147..ba777a44d 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2558,20 +2558,27 @@ class Router: self.set_client(model=deployment.to_json(exclude_none=True)) # set region (if azure model) - try: - if "azure" in deployment.litellm_params.model: - region = litellm.utils.get_model_region( - litellm_params=deployment.litellm_params, mode=None - ) + _auto_infer_region = os.environ.get("AUTO_INFER_REGION", False) + if _auto_infer_region == True or _auto_infer_region == "True": + print("Auto inferring region") # noqa + """ + Hiding behind a feature flag + When there is a large amount of LLM deployments this makes startup times blow up + """ + try: + if "azure" in deployment.litellm_params.model: + region = litellm.utils.get_model_region( + litellm_params=deployment.litellm_params, mode=None + ) - deployment.litellm_params.region_name = region - except Exception as e: - verbose_router_logger.error( - "Unable to get the region for azure model - {}, {}".format( - deployment.litellm_params.model, str(e) + deployment.litellm_params.region_name = region + except Exception as e: + verbose_router_logger.error( + "Unable to get the region for azure model - {}, {}".format( + deployment.litellm_params.model, str(e) + ) ) - ) - pass # [NON-BLOCKING] + pass # [NON-BLOCKING] return deployment @@ -2600,7 +2607,7 @@ class Router: self.model_names.append(deployment.model_name) return deployment - def upsert_deployment(self, deployment: Deployment) -> Deployment: + def upsert_deployment(self, deployment: Deployment) -> Optional[Deployment]: """ Add or update deployment Parameters: @@ -2610,8 +2617,17 @@ class Router: - The added/updated deployment """ # check if deployment already exists + _deployment_model_id = deployment.model_info.id or "" + _deployment_on_router: Optional[Deployment] = self.get_deployment( + model_id=_deployment_model_id + ) + if _deployment_on_router is not None: + # deployment with this model_id exists on the router + if deployment.litellm_params == _deployment_on_router.litellm_params: + # No need to update + return None - if deployment.model_info.id in self.get_model_ids(): + # if there is a new litellm param -> then update the deployment # remove the previous deployment removal_idx: Optional[int] = None for idx, model in enumerate(self.model_list): @@ -2620,16 +2636,9 @@ class Router: if removal_idx is not None: self.model_list.pop(removal_idx) - - # add to model list - _deployment = deployment.to_json(exclude_none=True) - self.model_list.append(_deployment) - - # initialize client - self._add_deployment(deployment=deployment) - - # add to model names - self.model_names.append(deployment.model_name) + else: + # if the model_id is not in router + self.add_deployment(deployment=deployment) return deployment def delete_deployment(self, id: str) -> Optional[Deployment]: diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 630baf346..bb47404dc 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -1141,7 +1141,7 @@ def test_get_hf_task_for_model(): # test_get_hf_task_for_model() -# litellm.set_verbose=False +# litellm.set_verbose = False # ################### Hugging Face TGI models ######################## # # TGI model # # this is a TGI model https://huggingface.co/glaiveai/glaive-coder-7b