From 52b1538b2ead219b8c4cae993a36e01290f0ffe3 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Mon, 1 Apr 2024 10:51:54 -0700
Subject: [PATCH] fix(router.py): support context window fallbacks for pre-call
 checks

---
 litellm/router.py            | 12 +++++++++
 litellm/tests/test_router.py | 50 +++++++++++++++++++++++++++++++++++-
 2 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/litellm/router.py b/litellm/router.py
index b852d7195..18aa83369 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -2210,6 +2210,18 @@ class Router:
                 ):
                     invalid_model_indices.append(idx)
 
+        if len(invalid_model_indices) == len(_returned_deployments):
+            """
+            - no healthy deployments available b/c context window checks
+            """
+            raise litellm.ContextWindowExceededError(
+                message="Context Window exceeded for given call",
+                model=model,
+                llm_provider="",
+                response=httpx.Response(
+                    status_code=400, request=httpx.Request("GET", "https://example.com")
+                ),
+            )
         if len(invalid_model_indices) > 0:
             for idx in reversed(invalid_model_indices):
                 _returned_deployments.pop(idx)
diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py
index 16a3c2475..dfcaf9f85 100644
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@@ -397,7 +397,7 @@ async def test_async_router_context_window_fallback():
         pytest.fail(f"Got unexpected exception on router! - {str(e)}")
 
 
-def test_router_context_window_check():
+def test_router_context_window_check_pre_call_check_in_group():
     """
     - Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
     - Send a 5k prompt
@@ -445,6 +445,54 @@ def test_router_context_window_check():
         pytest.fail(f"Got unexpected exception on router! - {str(e)}")
 
 
+def test_router_context_window_check_pre_call_check_out_group():
+    """
+    - Give 2 gpt-3.5-turbo model groups with different context windows (4k vs. 16k)
+    - Send a 5k prompt
+    - Assert it works
+    """
+    from large_text import text
+    import os
+
+    litellm.set_verbose = False
+
+    print(f"len(text): {len(text)}")
+    try:
+        model_list = [
+            {
+                "model_name": "gpt-3.5-turbo-small",  # openai model name
+                "litellm_params": {  # params for litellm completion/embedding call
+                    "model": "azure/chatgpt-v-2",
+                    "api_key": os.getenv("AZURE_API_KEY"),
+                    "api_version": os.getenv("AZURE_API_VERSION"),
+                    "api_base": os.getenv("AZURE_API_BASE"),
+                    "base_model": "azure/gpt-35-turbo",
+                },
+            },
+            {
+                "model_name": "gpt-3.5-turbo-large",  # openai model name
+                "litellm_params": {  # params for litellm completion/embedding call
+                    "model": "gpt-3.5-turbo-1106",
+                    "api_key": os.getenv("OPENAI_API_KEY"),
+                },
+            },
+        ]
+
+        router = Router(model_list=model_list, set_verbose=True, enable_pre_call_checks=True, num_retries=0, context_window_fallbacks=[{"gpt-3.5-turbo-small": ["gpt-3.5-turbo-large"]}])  # type: ignore
+
+        response = router.completion(
+            model="gpt-3.5-turbo-small",
+            messages=[
+                {"role": "system", "content": text},
+                {"role": "user", "content": "Who was Alexander?"},
+            ],
+        )
+
+        print(f"response: {response}")
+    except Exception as e:
+        pytest.fail(f"Got unexpected exception on router! - {str(e)}")
+
+
 ### FUNCTION CALLING