From ebc927f1c8b550ebc888dced3b9aebd3d0572a42 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 11 May 2024 10:18:08 -0700
Subject: [PATCH 1/3] feat(router.py): allow setting model_region in
 litellm_params

Closes https://github.com/BerriAI/litellm/issues/3580
---
 litellm/__init__.py                     |  3 ++
 litellm/proxy/_super_secret_config.yaml | 29 +++++----------
 litellm/router.py                       | 12 +++---
 litellm/tests/test_router.py            | 49 +++++++++++++++++++++++++
 litellm/utils.py                        |  3 ++
 5 files changed, 71 insertions(+), 25 deletions(-)

diff --git a/litellm/__init__.py b/litellm/__init__.py
index aedf42139..cd4aa5144 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -101,6 +101,9 @@ blocked_user_list: Optional[Union[str, List]] = None
 banned_keywords_list: Optional[Union[str, List]] = None
 llm_guard_mode: Literal["all", "key-specific", "request-specific"] = "all"
 ##################
+### PREVIEW FEATURES ###
+enable_preview_features: bool = False
+##################
 logging: bool = True
 caching: bool = (
     False  # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
diff --git a/litellm/proxy/_super_secret_config.yaml b/litellm/proxy/_super_secret_config.yaml
index 752cd281d..832e35113 100644
--- a/litellm/proxy/_super_secret_config.yaml
+++ b/litellm/proxy/_super_secret_config.yaml
@@ -1,25 +1,13 @@
 model_list:
 - litellm_params:
-    api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
-    api_key: my-fake-key
-    model: openai/my-fake-model
-  model_name: fake-openai-endpoint
-- litellm_params:
-    api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
-    api_key: my-fake-key-2
-    model: openai/my-fake-model-2
-  model_name: fake-openai-endpoint
-- litellm_params:
-    api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
-    api_key: my-fake-key-3
-    model: openai/my-fake-model-3
-  model_name: fake-openai-endpoint
-- model_name: gpt-4
-  litellm_params:
-    model: gpt-3.5-turbo
-- litellm_params:
-    model: together_ai/codellama/CodeLlama-13b-Instruct-hf
-  model_name: CodeLlama-13b-Instruct
+    api_base: os.environ/AZURE_API_BASE
+    api_key: os.environ/AZURE_API_KEY
+    api_version: 2023-07-01-preview
+    model: azure/azure-embedding-model
+  model_info:
+    base_model: text-embedding-ada-002
+    mode: embedding
+  model_name: text-embedding-ada-002
 
 router_settings:
   redis_host: redis
@@ -28,6 +16,7 @@ router_settings:
 
 litellm_settings:
   set_verbose: True
+  enable_preview_features: true
   # service_callback: ["prometheus_system"]
   # success_callback: ["prometheus"]
   # failure_callback: ["prometheus"]
diff --git a/litellm/router.py b/litellm/router.py
index f0d94908e..e0abc2e3b 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -2557,23 +2557,25 @@ class Router:
         # init OpenAI, Azure clients
         self.set_client(model=deployment.to_json(exclude_none=True))
 
-        # set region (if azure model)
-        _auto_infer_region = os.environ.get("AUTO_INFER_REGION", False)
-        if _auto_infer_region == True or _auto_infer_region == "True":
+        # set region (if azure model) ## PREVIEW FEATURE ##
+        if litellm.enable_preview_features == True:
             print("Auto inferring region")  # noqa
             """
             Hiding behind a feature flag
             When there is a large amount of LLM deployments this makes startup times blow up
             """
             try:
-                if "azure" in deployment.litellm_params.model:
+                if (
+                    "azure" in deployment.litellm_params.model
+                    and deployment.litellm_params.region_name is None
+                ):
                     region = litellm.utils.get_model_region(
                         litellm_params=deployment.litellm_params, mode=None
                     )
 
                     deployment.litellm_params.region_name = region
             except Exception as e:
-                verbose_router_logger.error(
+                verbose_router_logger.debug(
                     "Unable to get the region for azure model - {}, {}".format(
                         deployment.litellm_params.model, str(e)
                     )
diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py
index 2d277d749..7c59acb79 100644
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@@ -687,6 +687,55 @@ def test_router_context_window_check_pre_call_check_out_group():
         pytest.fail(f"Got unexpected exception on router! - {str(e)}")
 
 
+@pytest.mark.parametrize("allowed_model_region", ["eu", None])
+def test_router_region_pre_call_check(allowed_model_region):
+    """
+    If region based routing set
+    - check if only model in allowed region is allowed by '_pre_call_checks'
+    """
+    model_list = [
+        {
+            "model_name": "gpt-3.5-turbo",  # openai model name
+            "litellm_params": {  # params for litellm completion/embedding call
+                "model": "azure/chatgpt-v-2",
+                "api_key": os.getenv("AZURE_API_KEY"),
+                "api_version": os.getenv("AZURE_API_VERSION"),
+                "api_base": os.getenv("AZURE_API_BASE"),
+                "base_model": "azure/gpt-35-turbo",
+                "region_name": "eu",
+            },
+            "model_info": {"id": "1"},
+        },
+        {
+            "model_name": "gpt-3.5-turbo-large",  # openai model name
+            "litellm_params": {  # params for litellm completion/embedding call
+                "model": "gpt-3.5-turbo-1106",
+                "api_key": os.getenv("OPENAI_API_KEY"),
+            },
+            "model_info": {"id": "2"},
+        },
+    ]
+
+    router = Router(model_list=model_list, enable_pre_call_checks=True)
+
+    _healthy_deployments = router._pre_call_checks(
+        model="gpt-3.5-turbo",
+        healthy_deployments=model_list,
+        messages=[{"role": "user", "content": "Hey!"}],
+        allowed_model_region=allowed_model_region,
+    )
+
+    if allowed_model_region is None:
+        assert len(_healthy_deployments) == 2
+    else:
+        assert len(_healthy_deployments) == 1, "No models selected as healthy"
+        assert (
+            _healthy_deployments[0]["model_info"]["id"] == "1"
+        ), "Incorrect model id picked. Got id={}, expected id=1".format(
+            _healthy_deployments[0]["model_info"]["id"]
+        )
+
+
 ### FUNCTION CALLING
 
 
diff --git a/litellm/utils.py b/litellm/utils.py
index 9218f92a3..1c9c3df92 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -5881,6 +5881,9 @@ def calculate_max_parallel_requests(
 
 
 def _is_region_eu(model_region: str) -> bool:
+    if model_region == "eu":
+        return True
+
     EU_Regions = ["europe", "sweden", "switzerland", "france", "uk"]
     for region in EU_Regions:
         if "europe" in model_region.lower():

From 6714854bb77910d11fea987df41fd37facca46a5 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 11 May 2024 11:04:00 -0700
Subject: [PATCH 2/3] feat(router.py): support region routing for bedrock,
 vertex ai, watsonx

---
 litellm/llms/azure.py     |   8 ++-
 litellm/llms/bedrock.py   |  10 +++
 litellm/llms/vertex_ai.py |  17 +++++
 litellm/llms/watsonx.py   |   9 +++
 litellm/router.py         |   4 +-
 litellm/types/router.py   |   4 ++
 litellm/utils.py          | 147 +++++++++++++++++++++++++++++++++++---
 7 files changed, 187 insertions(+), 12 deletions(-)

diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py
index f416d1437..a56527a59 100644
--- a/litellm/llms/azure.py
+++ b/litellm/llms/azure.py
@@ -9,7 +9,7 @@ from litellm.utils import (
     convert_to_model_response_object,
     TranscriptionResponse,
 )
-from typing import Callable, Optional, BinaryIO
+from typing import Callable, Optional, BinaryIO, List
 from litellm import OpenAIConfig
 import litellm, json
 import httpx  # type: ignore
@@ -105,6 +105,12 @@ class AzureOpenAIConfig(OpenAIConfig):
                 optional_params["azure_ad_token"] = value
         return optional_params
 
+    def get_eu_regions(self) -> List[str]:
+        """
+        Source: https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#gpt-4-and-gpt-4-turbo-model-availability
+        """
+        return ["europe", "sweden", "switzerland", "france", "uk"]
+
 
 def select_azure_base_url_or_endpoint(azure_client_params: dict):
     # azure_client_params = {
diff --git a/litellm/llms/bedrock.py b/litellm/llms/bedrock.py
index 08433ba18..d2a83703a 100644
--- a/litellm/llms/bedrock.py
+++ b/litellm/llms/bedrock.py
@@ -52,6 +52,16 @@ class AmazonBedrockGlobalConfig:
                 optional_params[mapped_params[param]] = value
         return optional_params
 
+    def get_eu_regions(self) -> List[str]:
+        """
+        Source: https://www.aws-services.info/bedrock.html
+        """
+        return [
+            "eu-west-1",
+            "eu-west-3",
+            "eu-central-1",
+        ]
+
 
 class AmazonTitanConfig:
     """
diff --git a/litellm/llms/vertex_ai.py b/litellm/llms/vertex_ai.py
index a61c07df0..d3bb2c78a 100644
--- a/litellm/llms/vertex_ai.py
+++ b/litellm/llms/vertex_ai.py
@@ -198,6 +198,23 @@ class VertexAIConfig:
                 optional_params[mapped_params[param]] = value
         return optional_params
 
+    def get_eu_regions(self) -> List[str]:
+        """
+        Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations#available-regions
+        """
+        return [
+            "europe-central2",
+            "europe-north1",
+            "europe-southwest1",
+            "europe-west1",
+            "europe-west2",
+            "europe-west3",
+            "europe-west4",
+            "europe-west6",
+            "europe-west8",
+            "europe-west9",
+        ]
+
 
 import asyncio
 
diff --git a/litellm/llms/watsonx.py b/litellm/llms/watsonx.py
index 99f2d18ba..a12676fa0 100644
--- a/litellm/llms/watsonx.py
+++ b/litellm/llms/watsonx.py
@@ -149,6 +149,15 @@ class IBMWatsonXAIConfig:
                 optional_params[mapped_params[param]] = value
         return optional_params
 
+    def get_eu_regions(self) -> List[str]:
+        """
+        Source: https://www.ibm.com/docs/en/watsonx/saas?topic=integrations-regional-availability
+        """
+        return [
+            "eu-de",
+            "eu-gb",
+        ]
+
 
 def convert_messages_to_prompt(model, messages, provider, custom_prompt_dict):
     # handle anthropic prompts and amazon titan prompts
diff --git a/litellm/router.py b/litellm/router.py
index e0abc2e3b..0b5846db9 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -2329,7 +2329,7 @@ class Router:
                     )  # cache for 1 hr
 
             else:
-                _api_key = api_key
+                _api_key = api_key  # type: ignore
                 if _api_key is not None and isinstance(_api_key, str):
                     # only show first 5 chars of api_key
                     _api_key = _api_key[:8] + "*" * 15
@@ -2953,7 +2953,7 @@ class Router:
                 ):
                     # check if in allowed_model_region
                     if (
-                        _is_region_eu(model_region=_litellm_params["region_name"])
+                        _is_region_eu(litellm_params=LiteLLM_Params(**_litellm_params))
                         == False
                     ):
                         invalid_model_indices.append(idx)
diff --git a/litellm/types/router.py b/litellm/types/router.py
index dbf36f17c..e8f3ff641 100644
--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@@ -132,6 +132,8 @@ class GenericLiteLLMParams(BaseModel):
     aws_access_key_id: Optional[str] = None
     aws_secret_access_key: Optional[str] = None
     aws_region_name: Optional[str] = None
+    ## IBM WATSONX ##
+    watsonx_region_name: Optional[str] = None
     ## CUSTOM PRICING ##
     input_cost_per_token: Optional[float] = None
     output_cost_per_token: Optional[float] = None
@@ -161,6 +163,8 @@ class GenericLiteLLMParams(BaseModel):
         aws_access_key_id: Optional[str] = None,
         aws_secret_access_key: Optional[str] = None,
         aws_region_name: Optional[str] = None,
+        ## IBM WATSONX ##
+        watsonx_region_name: Optional[str] = None,
         input_cost_per_token: Optional[float] = None,
         output_cost_per_token: Optional[float] = None,
         input_cost_per_second: Optional[float] = None,
diff --git a/litellm/utils.py b/litellm/utils.py
index 1c9c3df92..2704ccbcb 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -107,7 +107,18 @@ try:
 except Exception as e:
     verbose_logger.debug(f"Exception import enterprise features {str(e)}")
 
-from typing import cast, List, Dict, Union, Optional, Literal, Any, BinaryIO, Iterable
+from typing import (
+    cast,
+    List,
+    Dict,
+    Union,
+    Optional,
+    Literal,
+    Any,
+    BinaryIO,
+    Iterable,
+    Tuple,
+)
 from .caching import Cache
 from concurrent.futures import ThreadPoolExecutor
 
@@ -5880,13 +5891,70 @@ def calculate_max_parallel_requests(
     return None
 
 
-def _is_region_eu(model_region: str) -> bool:
-    if model_region == "eu":
+def _get_model_region(
+    custom_llm_provider: str, litellm_params: LiteLLM_Params
+) -> Optional[str]:
+    """
+    Return the region for a model, for a given provider
+    """
+    if custom_llm_provider == "vertex_ai":
+        # check 'vertex_location'
+        vertex_ai_location = (
+            litellm_params.vertex_location
+            or litellm.vertex_location
+            or get_secret("VERTEXAI_LOCATION")
+            or get_secret("VERTEX_LOCATION")
+        )
+        if vertex_ai_location is not None and isinstance(vertex_ai_location, str):
+            return vertex_ai_location
+    elif custom_llm_provider == "bedrock":
+        aws_region_name = litellm_params.aws_region_name
+        if aws_region_name is not None:
+            return aws_region_name
+    elif custom_llm_provider == "watsonx":
+        watsonx_region_name = litellm_params.watsonx_region_name
+        if watsonx_region_name is not None:
+            return watsonx_region_name
+    return litellm_params.region_name
+
+
+def _is_region_eu(litellm_params: LiteLLM_Params) -> bool:
+    """
+    Return true/false if a deployment is in the EU
+    """
+    if litellm_params.region_name == "eu":
         return True
 
-    EU_Regions = ["europe", "sweden", "switzerland", "france", "uk"]
-    for region in EU_Regions:
-        if "europe" in model_region.lower():
+    ## ELSE ##
+    """
+    - get provider 
+    - get provider regions 
+    - return true if given region (get_provider_region) in eu region (config.get_eu_regions())
+    """
+    model, custom_llm_provider, _, _ = litellm.get_llm_provider(
+        model=litellm_params.model, litellm_params=litellm_params
+    )
+
+    model_region = _get_model_region(
+        custom_llm_provider=custom_llm_provider, litellm_params=litellm_params
+    )
+
+    if model_region is None:
+        return False
+
+    if custom_llm_provider == "azure":
+        eu_regions = litellm.AzureOpenAIConfig().get_eu_regions()
+    elif custom_llm_provider == "vertex_ai":
+        eu_regions = litellm.VertexAIConfig().get_eu_regions()
+    elif custom_llm_provider == "bedrock":
+        eu_regions = litellm.AmazonBedrockGlobalConfig().get_eu_regions()
+    elif custom_llm_provider == "watsonx":
+        eu_regions = litellm.IBMWatsonXAIConfig().get_eu_regions()
+    else:
+        return False
+
+    for region in eu_regions:
+        if region in model_region.lower():
             return True
     return False
 
@@ -6312,8 +6380,23 @@ def get_llm_provider(
     custom_llm_provider: Optional[str] = None,
     api_base: Optional[str] = None,
     api_key: Optional[str] = None,
-):
+    litellm_params: Optional[LiteLLM_Params] = None,
+) -> Tuple[str, str, Optional[str], Optional[str]]:
+    """
+    Returns the provider for a given model name - e.g. 'azure/chatgpt-v-2' -> 'azure'
+
+    For router -> Can also give the whole litellm param dict -> this function will extract the relevant details
+    """
     try:
+        ## IF LITELLM PARAMS GIVEN ##
+        if litellm_params is not None:
+            assert (
+                custom_llm_provider is None and api_base is None and api_key is None
+            ), "Either pass in litellm_params or the custom_llm_provider/api_base/api_key. Otherwise, these values will be overriden."
+            custom_llm_provider = litellm_params.custom_llm_provider
+            api_base = litellm_params.api_base
+            api_key = litellm_params.api_key
+
         dynamic_api_key = None
         # check if llm provider provided
         # AZURE AI-Studio Logic - Azure AI Studio supports AZURE/Cohere
@@ -6374,7 +6457,8 @@ def get_llm_provider(
                     api_base
                     or get_secret("MISTRAL_AZURE_API_BASE")  # for Azure AI Mistral
                     or "https://api.mistral.ai/v1"
-                )
+                )  # type: ignore
+
                 # if api_base does not end with /v1 we add it
                 if api_base is not None and not api_base.endswith(
                     "/v1"
@@ -6397,10 +6481,30 @@ def get_llm_provider(
                     or get_secret("TOGETHERAI_API_KEY")
                     or get_secret("TOGETHER_AI_TOKEN")
                 )
+            if api_base is not None and not isinstance(api_base, str):
+                raise Exception(
+                    "api base needs to be a string. api_base={}".format(api_base)
+                )
+            if dynamic_api_key is not None and not isinstance(dynamic_api_key, str):
+                raise Exception(
+                    "dynamic_api_key needs to be a string. dynamic_api_key={}".format(
+                        dynamic_api_key
+                    )
+                )
             return model, custom_llm_provider, dynamic_api_key, api_base
         elif model.split("/", 1)[0] in litellm.provider_list:
             custom_llm_provider = model.split("/", 1)[0]
             model = model.split("/", 1)[1]
+            if api_base is not None and not isinstance(api_base, str):
+                raise Exception(
+                    "api base needs to be a string. api_base={}".format(api_base)
+                )
+            if dynamic_api_key is not None and not isinstance(dynamic_api_key, str):
+                raise Exception(
+                    "dynamic_api_key needs to be a string. dynamic_api_key={}".format(
+                        dynamic_api_key
+                    )
+                )
             return model, custom_llm_provider, dynamic_api_key, api_base
         # check if api base is a known openai compatible endpoint
         if api_base:
@@ -6424,7 +6528,22 @@ def get_llm_provider(
                     elif endpoint == "api.deepseek.com/v1":
                         custom_llm_provider = "deepseek"
                         dynamic_api_key = get_secret("DEEPSEEK_API_KEY")
-                    return model, custom_llm_provider, dynamic_api_key, api_base
+
+                    if api_base is not None and not isinstance(api_base, str):
+                        raise Exception(
+                            "api base needs to be a string. api_base={}".format(
+                                api_base
+                            )
+                        )
+                    if dynamic_api_key is not None and not isinstance(
+                        dynamic_api_key, str
+                    ):
+                        raise Exception(
+                            "dynamic_api_key needs to be a string. dynamic_api_key={}".format(
+                                dynamic_api_key
+                            )
+                        )
+                    return model, custom_llm_provider, dynamic_api_key, api_base  # type: ignore
 
         # check if model in known model provider list  -> for huggingface models, raise exception as they don't have a fixed provider (can be togetherai, anyscale, baseten, runpod, et.)
         ## openai - chatcompletion + text completion
@@ -6515,6 +6634,16 @@ def get_llm_provider(
                 ),
                 llm_provider="",
             )
+        if api_base is not None and not isinstance(api_base, str):
+            raise Exception(
+                "api base needs to be a string. api_base={}".format(api_base)
+            )
+        if dynamic_api_key is not None and not isinstance(dynamic_api_key, str):
+            raise Exception(
+                "dynamic_api_key needs to be a string. dynamic_api_key={}".format(
+                    dynamic_api_key
+                )
+            )
         return model, custom_llm_provider, dynamic_api_key, api_base
     except Exception as e:
         if isinstance(e, litellm.exceptions.BadRequestError):

From 0c87bb5adf11d46f6d42da82a17925e694a8ff99 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 11 May 2024 11:34:12 -0700
Subject: [PATCH 3/3] docs(reliability.md): add region based routing to proxy +
 sdk docs

---
 docs/my-website/docs/proxy/reliability.md |  79 +++++++++++--
 docs/my-website/docs/routing.md           | 128 +++++++++++++---------
 2 files changed, 148 insertions(+), 59 deletions(-)

diff --git a/docs/my-website/docs/proxy/reliability.md b/docs/my-website/docs/proxy/reliability.md
index bd04216dd..e39a6765f 100644
--- a/docs/my-website/docs/proxy/reliability.md
+++ b/docs/my-website/docs/proxy/reliability.md
@@ -151,7 +151,7 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
      }'
 ```
 
-## Advanced - Context Window Fallbacks 
+## Advanced - Context Window Fallbacks (Pre-Call Checks + Fallbacks)
 
 **Before call is made** check if a call is within model context window with  **`enable_pre_call_checks: true`**.
 
@@ -232,16 +232,16 @@ model_list:
 	- model_name: gpt-3.5-turbo-small
 	  litellm_params:
 		model: azure/chatgpt-v-2
-		api_base: os.environ/AZURE_API_BASE
-		api_key: os.environ/AZURE_API_KEY
-		api_version: "2023-07-01-preview"
-	  model_info:
-		base_model: azure/gpt-4-1106-preview # 2. 👈 (azure-only) SET BASE MODEL
+      api_base: os.environ/AZURE_API_BASE
+      api_key: os.environ/AZURE_API_KEY
+      api_version: "2023-07-01-preview"
+      model_info:
+      base_model: azure/gpt-4-1106-preview # 2. 👈 (azure-only) SET BASE MODEL
 	
 	- model_name: gpt-3.5-turbo-large
 	  litellm_params:
-		model: gpt-3.5-turbo-1106
-		api_key: os.environ/OPENAI_API_KEY
+      model: gpt-3.5-turbo-1106
+      api_key: os.environ/OPENAI_API_KEY
 
   - model_name: claude-opus
     litellm_params:
@@ -287,6 +287,69 @@ print(response)
 </Tabs>
 
 
+## Advanced - EU-Region Filtering (Pre-Call Checks)
+
+**Before call is made** check if a call is within model context window with  **`enable_pre_call_checks: true`**.
+
+Set 'region_name' of deployment. 
+
+**Note:** LiteLLM can automatically infer region_name for Vertex AI, Bedrock, and IBM WatsonxAI based on your litellm params. For Azure, set `litellm.enable_preview = True`.
+
+**1. Set Config**
+
+```yaml
+router_settings:
+	enable_pre_call_checks: true # 1. Enable pre-call checks
+
+model_list:
+- model_name: gpt-3.5-turbo
+  litellm_params:
+    model: azure/chatgpt-v-2
+    api_base: os.environ/AZURE_API_BASE
+    api_key: os.environ/AZURE_API_KEY
+    api_version: "2023-07-01-preview"
+    region_name: "eu" # 👈 SET EU-REGION
+
+- model_name: gpt-3.5-turbo
+  litellm_params:
+    model: gpt-3.5-turbo-1106
+    api_key: os.environ/OPENAI_API_KEY
+
+- model_name: gemini-pro
+  litellm_params:
+    model: vertex_ai/gemini-pro-1.5
+    vertex_project: adroit-crow-1234
+    vertex_location: us-east1 # 👈 AUTOMATICALLY INFERS 'region_name'
+```
+
+**2. Start proxy**
+
+```bash
+litellm --config /path/to/config.yaml
+
+# RUNNING on http://0.0.0.0:4000
+```
+
+**3. Test it!**
+
+```python
+import openai
+client = openai.OpenAI(
+    api_key="anything",
+    base_url="http://0.0.0.0:4000"
+)
+
+# request sent to model set on litellm proxy, `litellm --model`
+response = client.chat.completions.with_raw_response.create(
+    model="gpt-3.5-turbo",
+    messages = [{"role": "user", "content": "Who was Alexander?"}]
+)
+
+print(response)
+
+print(f"response.headers.get('x-litellm-model-api-base')")
+```
+
 ## Advanced - Custom Timeouts, Stream Timeouts - Per Model
 For each model you can set `timeout` & `stream_timeout` under `litellm_params`
 ```yaml
diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md
index 0b0c7713c..b1afad2fb 100644
--- a/docs/my-website/docs/routing.md
+++ b/docs/my-website/docs/routing.md
@@ -879,13 +879,11 @@ router = Router(model_list: Optional[list] = None,
 				 cache_responses=True)
 ```
 
-## Pre-Call Checks (Context Window)
+## Pre-Call Checks (Context Window, EU-Regions)
 
 Enable pre-call checks to filter out:
 1. deployments with context window limit < messages for a call.
-2. deployments that have exceeded rate limits when making concurrent calls. (eg. `asyncio.gather(*[
-        router.acompletion(model="gpt-3.5-turbo", messages=m) for m in list_of_messages
-    ])`)
+2. deployments outside of eu-region
 
 <Tabs>
 <TabItem value="sdk" label="SDK">
@@ -900,10 +898,14 @@ router = Router(model_list=model_list, enable_pre_call_checks=True) # 👈 Set t
 
 **2. Set Model List**
 
-For azure deployments, set the base model. Pick the base model from [this list](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json), all the azure models start with `azure/`. 
+For context window checks on azure deployments, set the base model. Pick the base model from [this list](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json), all the azure models start with `azure/`. 
 
-<Tabs>
-<TabItem value="same-group" label="Same Group">
+For 'eu-region' filtering, Set 'region_name' of deployment. 
+
+**Note:** We automatically infer region_name for Vertex AI, Bedrock, and IBM WatsonxAI based on your litellm params. For Azure, set `litellm.enable_preview = True`.
+
+
+[**See Code**](https://github.com/BerriAI/litellm/blob/d33e49411d6503cb634f9652873160cd534dec96/litellm/router.py#L2958)
 
 ```python
 model_list = [
@@ -914,10 +916,9 @@ model_list = [
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
-                },
-				"model_info": {
+					"region_name": "eu" # 👈 SET 'EU' REGION NAME
 					"base_model": "azure/gpt-35-turbo", # 👈 (Azure-only) SET BASE MODEL
-				}
+                },
             },
             {
                 "model_name": "gpt-3.5-turbo", # model group name
@@ -926,54 +927,26 @@ model_list = [
                     "api_key": os.getenv("OPENAI_API_KEY"),
                 },
             },
+			{
+				"model_name": "gemini-pro",
+				"litellm_params: {
+					"model": "vertex_ai/gemini-pro-1.5", 
+					"vertex_project": "adroit-crow-1234",
+					"vertex_location": "us-east1" # 👈 AUTOMATICALLY INFERS 'region_name'
+				}
+			}
         ]
 
 router = Router(model_list=model_list, enable_pre_call_checks=True) 
 ```
 
-</TabItem>
-
-<TabItem value="different-group" label="Context Window Fallbacks (Different Groups)">
-
-```python
-model_list = [
-            {
-                "model_name": "gpt-3.5-turbo-small", # model group name
-                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "azure/chatgpt-v-2",
-                    "api_key": os.getenv("AZURE_API_KEY"),
-                    "api_version": os.getenv("AZURE_API_VERSION"),
-                    "api_base": os.getenv("AZURE_API_BASE"),
-                },
-				"model_info": {
-					"base_model": "azure/gpt-35-turbo", # 👈 (Azure-only) SET BASE MODEL
-				}
-            },
-            {
-                "model_name": "gpt-3.5-turbo-large", # model group name
-                "litellm_params": {  # params for litellm completion/embedding call
-                    "model": "gpt-3.5-turbo-1106",
-                    "api_key": os.getenv("OPENAI_API_KEY"),
-                },
-            },
-            {
-                "model_name": "claude-opus", 
-                "litellm_params": {  call
-                    "model": "claude-3-opus-20240229",
-                    "api_key": os.getenv("ANTHROPIC_API_KEY"),
-                },
-            },
-        ]
-
-router = Router(model_list=model_list, enable_pre_call_checks=True, context_window_fallbacks=[{"gpt-3.5-turbo-small": ["gpt-3.5-turbo-large", "claude-opus"]}]) 
-```
-
-</TabItem>
-
-</Tabs>
 
 **3. Test it!**
 
+
+<Tabs>
+<TabItem value="context-window-check" label="Context Window Check">
+
 ```python
 """
 - Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
@@ -983,7 +956,6 @@ router = Router(model_list=model_list, enable_pre_call_checks=True, context_wind
 from litellm import Router
 import os
 
-try:
 model_list = [
 	{
 		"model_name": "gpt-3.5-turbo",  # model group name
@@ -992,6 +964,7 @@ model_list = [
 			"api_key": os.getenv("AZURE_API_KEY"),
 			"api_version": os.getenv("AZURE_API_VERSION"),
 			"api_base": os.getenv("AZURE_API_BASE"),
+			"base_model": "azure/gpt-35-turbo",
 		},
 		"model_info": {
 			"base_model": "azure/gpt-35-turbo", 
@@ -1021,6 +994,59 @@ response = router.completion(
 print(f"response: {response}")
 ```
 </TabItem>
+<TabItem value="eu-region-check" label="EU Region Check">
+
+```python
+"""
+- Give 2 gpt-3.5-turbo deployments, in eu + non-eu regions
+- Make a call
+- Assert it picks the eu-region model
+"""
+
+from litellm import Router
+import os
+
+model_list = [
+	{
+		"model_name": "gpt-3.5-turbo",  # model group name
+		"litellm_params": {  # params for litellm completion/embedding call
+			"model": "azure/chatgpt-v-2",
+			"api_key": os.getenv("AZURE_API_KEY"),
+			"api_version": os.getenv("AZURE_API_VERSION"),
+			"api_base": os.getenv("AZURE_API_BASE"),
+			"region_name": "eu"
+		},
+		"model_info": {
+			"id": "1"
+		}
+	},
+	{
+		"model_name": "gpt-3.5-turbo",  # model group name
+		"litellm_params": {  # params for litellm completion/embedding call
+			"model": "gpt-3.5-turbo-1106",
+			"api_key": os.getenv("OPENAI_API_KEY"),
+		},
+		"model_info": {
+			"id": "2"
+		}
+	},
+]
+
+router = Router(model_list=model_list, enable_pre_call_checks=True) 
+
+response = router.completion(
+	model="gpt-3.5-turbo",
+	messages=[{"role": "user", "content": "Who was Alexander?"}],
+)
+
+print(f"response: {response}")
+
+print(f"response id: {response._hidden_params['model_id']}")
+```
+
+</TabItem>
+</Tabs>
+</TabItem>
 <TabItem value="proxy" label="Proxy">
 
 :::info