From 52b1538b2ead219b8c4cae993a36e01290f0ffe3 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 1 Apr 2024 10:51:54 -0700 Subject: [PATCH] fix(router.py): support context window fallbacks for pre-call checks --- litellm/router.py | 12 +++++++++ litellm/tests/test_router.py | 50 +++++++++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/litellm/router.py b/litellm/router.py index b852d7195..18aa83369 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2210,6 +2210,18 @@ class Router: ): invalid_model_indices.append(idx) + if len(invalid_model_indices) == len(_returned_deployments): + """ + - no healthy deployments available b/c context window checks + """ + raise litellm.ContextWindowExceededError( + message="Context Window exceeded for given call", + model=model, + llm_provider="", + response=httpx.Response( + status_code=400, request=httpx.Request("GET", "https://example.com") + ), + ) if len(invalid_model_indices) > 0: for idx in reversed(invalid_model_indices): _returned_deployments.pop(idx) diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py index 16a3c2475..dfcaf9f85 100644 --- a/litellm/tests/test_router.py +++ b/litellm/tests/test_router.py @@ -397,7 +397,7 @@ async def test_async_router_context_window_fallback(): pytest.fail(f"Got unexpected exception on router! - {str(e)}") -def test_router_context_window_check(): +def test_router_context_window_check_pre_call_check_in_group(): """ - Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k) - Send a 5k prompt @@ -445,6 +445,54 @@ def test_router_context_window_check(): pytest.fail(f"Got unexpected exception on router! - {str(e)}") +def test_router_context_window_check_pre_call_check_out_group(): + """ + - Give 2 gpt-3.5-turbo model groups with different context windows (4k vs. 16k) + - Send a 5k prompt + - Assert it works + """ + from large_text import text + import os + + litellm.set_verbose = False + + print(f"len(text): {len(text)}") + try: + model_list = [ + { + "model_name": "gpt-3.5-turbo-small", # openai model name + "litellm_params": { # params for litellm completion/embedding call + "model": "azure/chatgpt-v-2", + "api_key": os.getenv("AZURE_API_KEY"), + "api_version": os.getenv("AZURE_API_VERSION"), + "api_base": os.getenv("AZURE_API_BASE"), + "base_model": "azure/gpt-35-turbo", + }, + }, + { + "model_name": "gpt-3.5-turbo-large", # openai model name + "litellm_params": { # params for litellm completion/embedding call + "model": "gpt-3.5-turbo-1106", + "api_key": os.getenv("OPENAI_API_KEY"), + }, + }, + ] + + router = Router(model_list=model_list, set_verbose=True, enable_pre_call_checks=True, num_retries=0, context_window_fallbacks=[{"gpt-3.5-turbo-small": ["gpt-3.5-turbo-large"]}]) # type: ignore + + response = router.completion( + model="gpt-3.5-turbo-small", + messages=[ + {"role": "system", "content": text}, + {"role": "user", "content": "Who was Alexander?"}, + ], + ) + + print(f"response: {response}") + except Exception as e: + pytest.fail(f"Got unexpected exception on router! - {str(e)}") + + ### FUNCTION CALLING