diff --git a/litellm/main.py b/litellm/main.py index 5f2b34482..817dc5510 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -572,6 +572,7 @@ def completion( "ttl", "cache", "no-log", + "base_model", ] default_params = openai_params + litellm_params non_default_params = { diff --git a/litellm/router.py b/litellm/router.py index e863bfbc8..56a4894bf 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2175,13 +2175,19 @@ class Router: try: input_tokens = litellm.token_counter(messages=messages) - except: + except Exception as e: return _returned_deployments for idx, deployment in enumerate(_returned_deployments): # see if we have the info for this model try: - model_info = litellm.get_model_info(model=deployment["model_name"]) + base_model = deployment.get("litellm_params", {}).get( + "base_model", None + ) + model = base_model or deployment.get("litellm_params", {}).get( + "model", None + ) + model_info = litellm.get_model_info(model=model) except: continue diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py index a85a770d4..40fa52b32 100644 --- a/litellm/tests/test_router.py +++ b/litellm/tests/test_router.py @@ -319,6 +319,7 @@ def test_router_context_window_check(): "api_key": os.getenv("AZURE_API_KEY"), "api_version": os.getenv("AZURE_API_VERSION"), "api_base": os.getenv("AZURE_API_BASE"), + "base_model": "azure/gpt-35-turbo", }, }, { @@ -330,7 +331,7 @@ def test_router_context_window_check(): }, ] - router = Router(model_list=model_list, set_verbose=True, enable_pre_call_checks=True) # type: ignore + router = Router(model_list=model_list, set_verbose=True, enable_pre_call_checks=True, num_retries=0) # type: ignore response = router.completion( model="gpt-3.5-turbo", @@ -341,7 +342,6 @@ def test_router_context_window_check(): ) print(f"response: {response}") - raise Exception("it worked!") except Exception as e: pytest.fail(f"Got unexpected exception on router! - {str(e)}")