diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md
index 9735b539e..0022794c8 100644
--- a/docs/my-website/docs/routing.md
+++ b/docs/my-website/docs/routing.md
@@ -551,6 +551,94 @@ router = Router(model_list: Optional[list] = None,
 				 cache_responses=True)
 ```
 
+## Pre-Call Checks (Context Window)
+
+Enable pre-call checks to filter out deployments with context window limit < messages for a call.
+
+**1. Enable pre-call checks**
+```python 
+from litellm import Router 
+# ...
+router = Router(model_list=model_list, enable_pre_call_checks=True) # 👈 Set to True
+```
+
+**2. (Azure-only) Set base model**
+
+For azure deployments, set the base model. Pick the base model from [this list](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json), all the azure models start with `azure/`. 
+
+```python
+model_list = [
+            {
+                "model_name": "gpt-3.5-turbo", # model group name
+                "litellm_params": {  # params for litellm completion/embedding call
+                    "model": "azure/chatgpt-v-2",
+                    "api_key": os.getenv("AZURE_API_KEY"),
+                    "api_version": os.getenv("AZURE_API_VERSION"),
+                    "api_base": os.getenv("AZURE_API_BASE"),
+                },
+				"model_info": {
+					"base_model": "azure/gpt-35-turbo", # 👈 SET BASE MODEL
+				}
+            },
+            {
+                "model_name": "gpt-3.5-turbo", # model group name
+                "litellm_params": {  # params for litellm completion/embedding call
+                    "model": "gpt-3.5-turbo-1106",
+                    "api_key": os.getenv("OPENAI_API_KEY"),
+                },
+            },
+        ]
+```
+
+**3. Test it!**
+
+```python
+"""
+- Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
+- Send a 5k prompt
+- Assert it works
+"""
+from litellm import Router
+import os
+
+try:
+model_list = [
+	{
+		"model_name": "gpt-3.5-turbo",  # model group name
+		"litellm_params": {  # params for litellm completion/embedding call
+			"model": "azure/chatgpt-v-2",
+			"api_key": os.getenv("AZURE_API_KEY"),
+			"api_version": os.getenv("AZURE_API_VERSION"),
+			"api_base": os.getenv("AZURE_API_BASE"),
+		},
+		"model_info": {
+			"base_model": "azure/gpt-35-turbo", 
+		}
+	},
+	{
+		"model_name": "gpt-3.5-turbo",  # model group name
+		"litellm_params": {  # params for litellm completion/embedding call
+			"model": "gpt-3.5-turbo-1106",
+			"api_key": os.getenv("OPENAI_API_KEY"),
+		},
+	},
+]
+
+router = Router(model_list=model_list, enable_pre_call_checks=True) 
+
+text = "What is the meaning of 42?" * 5000
+
+response = router.completion(
+	model="gpt-3.5-turbo",
+	messages=[
+		{"role": "system", "content": text},
+		{"role": "user", "content": "Who was Alexander?"},
+	],
+)
+
+print(f"response: {response}")
+```
+
 ## Caching across model groups
 
 If you want to cache across 2 different model groups (e.g. azure deployments, and openai), use caching groups. 
diff --git a/litellm/router.py b/litellm/router.py
index 56a4894bf..b39b67a09 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -2181,9 +2181,11 @@ class Router:
         for idx, deployment in enumerate(_returned_deployments):
             # see if we have the info for this model
             try:
-                base_model = deployment.get("litellm_params", {}).get(
-                    "base_model", None
-                )
+                base_model = deployment.get("model_info", {}).get("base_model", None)
+                if base_model is None:
+                    base_model = deployment.get("litellm_params", {}).get(
+                        "base_model", None
+                    )
                 model = base_model or deployment.get("litellm_params", {}).get(
                     "model", None
                 )
diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py
index 40fa52b32..82580236a 100644
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@@ -301,7 +301,7 @@ def test_router_azure_acompletion():
 def test_router_context_window_check():
     """
     - Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
-    - Send a 10k prompt
+    - Send a 5k prompt
     - Assert it works
     """
     from large_text import text