diff --git a/litellm/main.py b/litellm/main.py
index 5f2b34482..817dc5510 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -572,6 +572,7 @@ def completion(
         "ttl",
         "cache",
         "no-log",
+        "base_model",
     ]
     default_params = openai_params + litellm_params
     non_default_params = {
diff --git a/litellm/router.py b/litellm/router.py
index e863bfbc8..56a4894bf 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -2175,13 +2175,19 @@ class Router:
 
         try:
             input_tokens = litellm.token_counter(messages=messages)
-        except:
+        except Exception as e:
             return _returned_deployments
 
         for idx, deployment in enumerate(_returned_deployments):
             # see if we have the info for this model
             try:
-                model_info = litellm.get_model_info(model=deployment["model_name"])
+                base_model = deployment.get("litellm_params", {}).get(
+                    "base_model", None
+                )
+                model = base_model or deployment.get("litellm_params", {}).get(
+                    "model", None
+                )
+                model_info = litellm.get_model_info(model=model)
             except:
                 continue
 
diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py
index a85a770d4..40fa52b32 100644
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@@ -319,6 +319,7 @@ def test_router_context_window_check():
                     "api_key": os.getenv("AZURE_API_KEY"),
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
+                    "base_model": "azure/gpt-35-turbo",
                 },
             },
             {
@@ -330,7 +331,7 @@ def test_router_context_window_check():
             },
         ]
 
-        router = Router(model_list=model_list, set_verbose=True, enable_pre_call_checks=True)  # type: ignore
+        router = Router(model_list=model_list, set_verbose=True, enable_pre_call_checks=True, num_retries=0)  # type: ignore
 
         response = router.completion(
             model="gpt-3.5-turbo",
@@ -341,7 +342,6 @@ def test_router_context_window_check():
         )
 
         print(f"response: {response}")
-        raise Exception("it worked!")
     except Exception as e:
         pytest.fail(f"Got unexpected exception on router! - {str(e)}")