From b20f4f65b4008de1b4acc45cc32bca59267dd408 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Fri, 17 May 2024 17:16:36 -0700
Subject: [PATCH] fix(bedrock_httpx.py): raise better timeout exception

---
 litellm/llms/bedrock_httpx.py | 15 ++++++++--
 litellm/tests/log.txt         | 40 +++----------------------
 litellm/utils.py              | 55 +++++++++++++++++++++++++++++++----
 3 files changed, 65 insertions(+), 45 deletions(-)

diff --git a/litellm/llms/bedrock_httpx.py b/litellm/llms/bedrock_httpx.py
index 5d22c5ecbc..5fe0e0cc17 100644
--- a/litellm/llms/bedrock_httpx.py
+++ b/litellm/llms/bedrock_httpx.py
@@ -859,13 +859,14 @@ class BedrockLLM(BaseLLM):
             )
             return streaming_response
 
-        response = self.client.post(url=prepped.url, headers=prepped.headers, data=data)  # type: ignore
-
         try:
+            response = self.client.post(url=prepped.url, headers=prepped.headers, data=data)  # type: ignore
             response.raise_for_status()
         except httpx.HTTPStatusError as err:
             error_code = err.response.status_code
             raise BedrockError(status_code=error_code, message=response.text)
+        except httpx.TimeoutException as e:
+            raise BedrockError(status_code=408, message="Timeout error occurred.")
 
         return self.process_response(
             model=model,
@@ -909,7 +910,15 @@ class BedrockLLM(BaseLLM):
         else:
             self.client = client  # type: ignore
 
-        response = await self.client.post(api_base, headers=headers, data=data)  # type: ignore
+        try:
+            response = await self.client.post(api_base, headers=headers, data=data)  # type: ignore
+            response.raise_for_status()
+        except httpx.HTTPStatusError as err:
+            error_code = err.response.status_code
+            raise BedrockError(status_code=error_code, message=response.text)
+        except httpx.TimeoutException as e:
+            raise BedrockError(status_code=408, message="Timeout error occurred.")
+
         return self.process_response(
             model=model,
             response=response,
diff --git a/litellm/tests/log.txt b/litellm/tests/log.txt
index c82f142963..fd9557c9b5 100644
--- a/litellm/tests/log.txt
+++ b/litellm/tests/log.txt
@@ -3,40 +3,9 @@ platform darwin -- Python 3.11.9, pytest-7.3.1, pluggy-1.3.0
 rootdir: /Users/krrishdholakia/Documents/litellm/litellm/tests
 plugins: timeout-2.2.0, asyncio-0.23.2, anyio-3.7.1, xdist-3.3.1
 asyncio: mode=Mode.STRICT
-collected 2 items
-
-test_streaming.py .Logging Details LiteLLM-Async Success Call
-Goes into checking if chunk has hiddden created at param
-Chunks have a created at hidden param
-Chunks sorted
-token_counter messages received: [{'content': 'Hello, how are you?', 'role': 'user'}]
-Token Counter - using generic token counter, for model=cohere.command-text-v14
-LiteLLM: Utils - Counting tokens for OpenAI model=gpt-3.5-turbo
-.Token Counter - using generic token counter, for model=cohere.command-text-v14
-LiteLLM: Utils - Counting tokens for OpenAI model=gpt-3.5-turbo
-Async success callbacks: Got a complete streaming response
-Looking up model=cohere.command-text-v14 in model_cost_map
-Success: model=cohere.command-text-v14 in model_cost_map
-prompt_tokens=13; completion_tokens=10
-Returned custom cost for model=cohere.command-text-v14 - prompt_tokens_cost_usd_dollar: 1.95e-05, completion_tokens_cost_usd_dollar: 1.9999999999999998e-05
-final cost: 3.95e-05; prompt_tokens_cost_usd_dollar: 1.95e-05; completion_tokens_cost_usd_dollar: 1.9999999999999998e-05
-                                                     [100%]Logging Details LiteLLM-Success Call: None
-success callbacks: []
-Goes into checking if chunk has hiddden created at param
-Chunks have a created at hidden param
-Chunks sorted
-token_counter messages received: [{'content': 'Hello, how are you?', 'role': 'user'}]
-Token Counter - using generic token counter, for model=cohere.command-text-v14
-LiteLLM: Utils - Counting tokens for OpenAI model=gpt-3.5-turbo
-Token Counter - using generic token counter, for model=cohere.command-text-v14
-LiteLLM: Utils - Counting tokens for OpenAI model=gpt-3.5-turbo
-Logging Details LiteLLM-Success Call streaming complete
-Looking up model=cohere.command-text-v14 in model_cost_map
-Success: model=cohere.command-text-v14 in model_cost_map
-prompt_tokens=13; completion_tokens=10
-Returned custom cost for model=cohere.command-text-v14 - prompt_tokens_cost_usd_dollar: 1.95e-05, completion_tokens_cost_usd_dollar: 1.9999999999999998e-05
-final cost: 3.95e-05; prompt_tokens_cost_usd_dollar: 1.95e-05; completion_tokens_cost_usd_dollar: 1.9999999999999998e-05
+collected 1 item
 
+test_router_timeout.py .                                                 [100%]
 
 =============================== warnings summary ===============================
 ../../../../../../opt/homebrew/lib/python3.11/site-packages/pydantic/_internal/_config.py:284: 25 warnings
@@ -99,10 +68,9 @@ final cost: 3.95e-05; prompt_tokens_cost_usd_dollar: 1.95e-05; completion_tokens
   /Users/krrishdholakia/Documents/litellm/litellm/utils.py:60: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice.
     with resources.open_text("litellm.llms.tokenizers", "anthropic_tokenizer.json") as f:
 
-test_streaming.py::test_bedrock_httpx_streaming[cohere.command-text-v14-False]
-test_streaming.py::test_bedrock_httpx_streaming[cohere.command-text-v14-True]
+test_router_timeout.py::test_router_timeouts_bedrock
   /opt/homebrew/lib/python3.11/site-packages/httpx/_content.py:204: DeprecationWarning: Use 'content=<...>' to upload raw bytes/text content.
     warnings.warn(message, DeprecationWarning)
 
 -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
-======================== 2 passed, 41 warnings in 4.94s ========================
+======================== 1 passed, 40 warnings in 0.99s ========================
diff --git a/litellm/utils.py b/litellm/utils.py
index facabd3aa9..800a9cdab6 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -8674,7 +8674,7 @@ def exception_type(
                         llm_provider="bedrock",
                         response=original_exception.response,
                     )
-                if "Malformed input request" in error_str:
+                elif "Malformed input request" in error_str:
                     exception_mapping_worked = True
                     raise BadRequestError(
                         message=f"BedrockException - {error_str}",
@@ -8682,7 +8682,7 @@ def exception_type(
                         llm_provider="bedrock",
                         response=original_exception.response,
                     )
-                if (
+                elif (
                     "Unable to locate credentials" in error_str
                     or "The security token included in the request is invalid"
                     in error_str
@@ -8694,7 +8694,7 @@ def exception_type(
                         llm_provider="bedrock",
                         response=original_exception.response,
                     )
-                if "AccessDeniedException" in error_str:
+                elif "AccessDeniedException" in error_str:
                     exception_mapping_worked = True
                     raise PermissionDeniedError(
                         message=f"BedrockException PermissionDeniedError - {error_str}",
@@ -8702,7 +8702,7 @@ def exception_type(
                         llm_provider="bedrock",
                         response=original_exception.response,
                     )
-                if (
+                elif (
                     "throttlingException" in error_str
                     or "ThrottlingException" in error_str
                 ):
@@ -8713,7 +8713,7 @@ def exception_type(
                         llm_provider="bedrock",
                         response=original_exception.response,
                     )
-                if (
+                elif (
                     "Connect timeout on endpoint URL" in error_str
                     or "timed out" in error_str
                 ):
@@ -8723,7 +8723,7 @@ def exception_type(
                         model=model,
                         llm_provider="bedrock",
                     )
-                if hasattr(original_exception, "status_code"):
+                elif hasattr(original_exception, "status_code"):
                     if original_exception.status_code == 500:
                         exception_mapping_worked = True
                         raise ServiceUnavailableError(
@@ -8761,6 +8761,49 @@ def exception_type(
                             model=model,
                             response=original_exception.response,
                         )
+                    elif original_exception.status_code == 408:
+                        exception_mapping_worked = True
+                        raise Timeout(
+                            message=f"BedrockException - {original_exception.message}",
+                            model=model,
+                            llm_provider=custom_llm_provider,
+                            litellm_debug_info=extra_information,
+                        )
+                    elif original_exception.status_code == 422:
+                        exception_mapping_worked = True
+                        raise BadRequestError(
+                            message=f"BedrockException - {original_exception.message}",
+                            model=model,
+                            llm_provider=custom_llm_provider,
+                            response=original_exception.response,
+                            litellm_debug_info=extra_information,
+                        )
+                    elif original_exception.status_code == 429:
+                        exception_mapping_worked = True
+                        raise RateLimitError(
+                            message=f"BedrockException - {original_exception.message}",
+                            model=model,
+                            llm_provider=custom_llm_provider,
+                            response=original_exception.response,
+                            litellm_debug_info=extra_information,
+                        )
+                    elif original_exception.status_code == 503:
+                        exception_mapping_worked = True
+                        raise ServiceUnavailableError(
+                            message=f"BedrockException - {original_exception.message}",
+                            model=model,
+                            llm_provider=custom_llm_provider,
+                            response=original_exception.response,
+                            litellm_debug_info=extra_information,
+                        )
+                    elif original_exception.status_code == 504:  # gateway timeout error
+                        exception_mapping_worked = True
+                        raise Timeout(
+                            message=f"BedrockException - {original_exception.message}",
+                            model=model,
+                            llm_provider=custom_llm_provider,
+                            litellm_debug_info=extra_information,
+                        )
             elif custom_llm_provider == "sagemaker":
                 if "Unable to locate credentials" in error_str:
                     exception_mapping_worked = True