diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py index 998e3eb9b9..8514996f4e 100644 --- a/litellm/tests/test_exceptions.py +++ b/litellm/tests/test_exceptions.py @@ -35,7 +35,10 @@ litellm.num_retries = 0 # Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered -models = ["command-nightly"] +models = [ + "sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4", + "bedrock/anthropic.claude-instant-v1", +] # Test 1: Context Window Errors @@ -45,7 +48,8 @@ def test_context_window(model): sample_text = "Say error 50 times" * 1000000 messages = [{"content": sample_text, "role": "user"}] try: - litellm.set_verbose = True + litellm.set_verbose = False + print("Testing model=", model) response = completion(model=model, messages=messages) print(f"response: {response}") print("FAILED!") diff --git a/litellm/utils.py b/litellm/utils.py index 659bc7e794..6029493c65 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -5895,6 +5895,7 @@ def exception_type( "too many tokens" in error_str or "expected maxLength:" in error_str or "Input is too long" in error_str + or "prompt: length: 1.." in error_str or "Too many input tokens" in error_str ): exception_mapping_worked = True @@ -5988,6 +5989,17 @@ def exception_type( llm_provider="sagemaker", response=original_exception.response, ) + elif ( + "`inputs` tokens + `max_new_tokens` must be <=" in error_str + or "instance type with more CPU capacity or memory" in error_str + ): + exception_mapping_worked = True + raise ContextWindowExceededError( + message=f"SagemakerException - {error_str}", + model=model, + llm_provider="sagemaker", + response=original_exception.response, + ) elif custom_llm_provider == "vertex_ai": if ( "Vertex AI API has not been used in project" in error_str