diff --git a/litellm/__init__.py b/litellm/__init__.py
index aedf42139..cd4aa5144 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -101,6 +101,9 @@ blocked_user_list: Optional[Union[str, List]] = None
 banned_keywords_list: Optional[Union[str, List]] = None
 llm_guard_mode: Literal["all", "key-specific", "request-specific"] = "all"
 ##################
+### PREVIEW FEATURES ###
+enable_preview_features: bool = False
+##################
 logging: bool = True
 caching: bool = (
     False  # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
diff --git a/litellm/proxy/_super_secret_config.yaml b/litellm/proxy/_super_secret_config.yaml
index 752cd281d..832e35113 100644
--- a/litellm/proxy/_super_secret_config.yaml
+++ b/litellm/proxy/_super_secret_config.yaml
@@ -1,25 +1,13 @@
 model_list:
 - litellm_params:
-    api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
-    api_key: my-fake-key
-    model: openai/my-fake-model
-  model_name: fake-openai-endpoint
-- litellm_params:
-    api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
-    api_key: my-fake-key-2
-    model: openai/my-fake-model-2
-  model_name: fake-openai-endpoint
-- litellm_params:
-    api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
-    api_key: my-fake-key-3
-    model: openai/my-fake-model-3
-  model_name: fake-openai-endpoint
-- model_name: gpt-4
-  litellm_params:
-    model: gpt-3.5-turbo
-- litellm_params:
-    model: together_ai/codellama/CodeLlama-13b-Instruct-hf
-  model_name: CodeLlama-13b-Instruct
+    api_base: os.environ/AZURE_API_BASE
+    api_key: os.environ/AZURE_API_KEY
+    api_version: 2023-07-01-preview
+    model: azure/azure-embedding-model
+  model_info:
+    base_model: text-embedding-ada-002
+    mode: embedding
+  model_name: text-embedding-ada-002
 
 router_settings:
   redis_host: redis
@@ -28,6 +16,7 @@ router_settings:
 
 litellm_settings:
   set_verbose: True
+  enable_preview_features: true
   # service_callback: ["prometheus_system"]
   # success_callback: ["prometheus"]
   # failure_callback: ["prometheus"]
diff --git a/litellm/router.py b/litellm/router.py
index f0d94908e..e0abc2e3b 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -2557,23 +2557,25 @@ class Router:
         # init OpenAI, Azure clients
         self.set_client(model=deployment.to_json(exclude_none=True))
 
-        # set region (if azure model)
-        _auto_infer_region = os.environ.get("AUTO_INFER_REGION", False)
-        if _auto_infer_region == True or _auto_infer_region == "True":
+        # set region (if azure model) ## PREVIEW FEATURE ##
+        if litellm.enable_preview_features == True:
             print("Auto inferring region")  # noqa
             """
             Hiding behind a feature flag
             When there is a large amount of LLM deployments this makes startup times blow up
             """
             try:
-                if "azure" in deployment.litellm_params.model:
+                if (
+                    "azure" in deployment.litellm_params.model
+                    and deployment.litellm_params.region_name is None
+                ):
                     region = litellm.utils.get_model_region(
                         litellm_params=deployment.litellm_params, mode=None
                     )
 
                     deployment.litellm_params.region_name = region
             except Exception as e:
-                verbose_router_logger.error(
+                verbose_router_logger.debug(
                     "Unable to get the region for azure model - {}, {}".format(
                         deployment.litellm_params.model, str(e)
                     )
diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py
index 2d277d749..7c59acb79 100644
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@@ -687,6 +687,55 @@ def test_router_context_window_check_pre_call_check_out_group():
         pytest.fail(f"Got unexpected exception on router! - {str(e)}")
 
 
+@pytest.mark.parametrize("allowed_model_region", ["eu", None])
+def test_router_region_pre_call_check(allowed_model_region):
+    """
+    If region based routing set
+    - check if only model in allowed region is allowed by '_pre_call_checks'
+    """
+    model_list = [
+        {
+            "model_name": "gpt-3.5-turbo",  # openai model name
+            "litellm_params": {  # params for litellm completion/embedding call
+                "model": "azure/chatgpt-v-2",
+                "api_key": os.getenv("AZURE_API_KEY"),
+                "api_version": os.getenv("AZURE_API_VERSION"),
+                "api_base": os.getenv("AZURE_API_BASE"),
+                "base_model": "azure/gpt-35-turbo",
+                "region_name": "eu",
+            },
+            "model_info": {"id": "1"},
+        },
+        {
+            "model_name": "gpt-3.5-turbo-large",  # openai model name
+            "litellm_params": {  # params for litellm completion/embedding call
+                "model": "gpt-3.5-turbo-1106",
+                "api_key": os.getenv("OPENAI_API_KEY"),
+            },
+            "model_info": {"id": "2"},
+        },
+    ]
+
+    router = Router(model_list=model_list, enable_pre_call_checks=True)
+
+    _healthy_deployments = router._pre_call_checks(
+        model="gpt-3.5-turbo",
+        healthy_deployments=model_list,
+        messages=[{"role": "user", "content": "Hey!"}],
+        allowed_model_region=allowed_model_region,
+    )
+
+    if allowed_model_region is None:
+        assert len(_healthy_deployments) == 2
+    else:
+        assert len(_healthy_deployments) == 1, "No models selected as healthy"
+        assert (
+            _healthy_deployments[0]["model_info"]["id"] == "1"
+        ), "Incorrect model id picked. Got id={}, expected id=1".format(
+            _healthy_deployments[0]["model_info"]["id"]
+        )
+
+
 ### FUNCTION CALLING
 
 
diff --git a/litellm/utils.py b/litellm/utils.py
index 9218f92a3..1c9c3df92 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -5881,6 +5881,9 @@ def calculate_max_parallel_requests(
 
 
 def _is_region_eu(model_region: str) -> bool:
+    if model_region == "eu":
+        return True
+
     EU_Regions = ["europe", "sweden", "switzerland", "france", "uk"]
     for region in EU_Regions:
         if "europe" in model_region.lower():