fix(bedrock_httpx.py): raise better timeout exception

2024-05-17 17:16:36 -07:00 · 2024-05-17 17:16:36 -07:00 · 4682802444
commit 4682802444
parent b486bb1f8b
3 changed files with 65 additions and 45 deletions
--- a/litellm/llms/bedrock_httpx.py
+++ b/litellm/llms/bedrock_httpx.py
@ -859,13 +859,14 @@ class BedrockLLM(BaseLLM):
            )
            return streaming_response
        response = self.client.post(url=prepped.url, headers=prepped.headers, data=data)  # type: ignore
        try:
            response = self.client.post(url=prepped.url, headers=prepped.headers, data=data)  # type: ignore
            response.raise_for_status()
        except httpx.HTTPStatusError as err:
            error_code = err.response.status_code
            raise BedrockError(status_code=error_code, message=response.text)
        except httpx.TimeoutException as e:
            raise BedrockError(status_code=408, message="Timeout error occurred.")
        return self.process_response(
            model=model,
@ -909,7 +910,15 @@ class BedrockLLM(BaseLLM):
        else:
            self.client = client  # type: ignore
-        response = await self.client.post(api_base, headers=headers, data=data)  # type: ignore
+        try:
            response = await self.client.post(api_base, headers=headers, data=data)  # type: ignore
            response.raise_for_status()
        except httpx.HTTPStatusError as err:
            error_code = err.response.status_code
            raise BedrockError(status_code=error_code, message=response.text)
        except httpx.TimeoutException as e:
            raise BedrockError(status_code=408, message="Timeout error occurred.")
        return self.process_response(
            model=model,
            response=response,
--- a/litellm/tests/log.txt
+++ b/litellm/tests/log.txt
@ -3,40 +3,9 @@ platform darwin -- Python 3.11.9, pytest-7.3.1, pluggy-1.3.0
 rootdir: /Users/krrishdholakia/Documents/litellm/litellm/tests
 plugins: timeout-2.2.0, asyncio-0.23.2, anyio-3.7.1, xdist-3.3.1
 asyncio: mode=Mode.STRICT
-collected 2 items
+collected 1 item
 test_streaming.py .Logging Details LiteLLM-Async Success Call
 Goes into checking if chunk has hiddden created at param
 Chunks have a created at hidden param
 Chunks sorted
 token_counter messages received: [{'content': 'Hello, how are you?', 'role': 'user'}]
 Token Counter - using generic token counter, for model=cohere.command-text-v14
 LiteLLM: Utils - Counting tokens for OpenAI model=gpt-3.5-turbo
 .Token Counter - using generic token counter, for model=cohere.command-text-v14
 LiteLLM: Utils - Counting tokens for OpenAI model=gpt-3.5-turbo
 Async success callbacks: Got a complete streaming response
 Looking up model=cohere.command-text-v14 in model_cost_map
 Success: model=cohere.command-text-v14 in model_cost_map
 prompt_tokens=13; completion_tokens=10
 Returned custom cost for model=cohere.command-text-v14 - prompt_tokens_cost_usd_dollar: 1.95e-05, completion_tokens_cost_usd_dollar: 1.9999999999999998e-05
 final cost: 3.95e-05; prompt_tokens_cost_usd_dollar: 1.95e-05; completion_tokens_cost_usd_dollar: 1.9999999999999998e-05
                                                     [100%]Logging Details LiteLLM-Success Call: None
 success callbacks: []
 Goes into checking if chunk has hiddden created at param
 Chunks have a created at hidden param
 Chunks sorted
 token_counter messages received: [{'content': 'Hello, how are you?', 'role': 'user'}]
 Token Counter - using generic token counter, for model=cohere.command-text-v14
 LiteLLM: Utils - Counting tokens for OpenAI model=gpt-3.5-turbo
 Token Counter - using generic token counter, for model=cohere.command-text-v14
 LiteLLM: Utils - Counting tokens for OpenAI model=gpt-3.5-turbo
 Logging Details LiteLLM-Success Call streaming complete
 Looking up model=cohere.command-text-v14 in model_cost_map
 Success: model=cohere.command-text-v14 in model_cost_map
 prompt_tokens=13; completion_tokens=10
 Returned custom cost for model=cohere.command-text-v14 - prompt_tokens_cost_usd_dollar: 1.95e-05, completion_tokens_cost_usd_dollar: 1.9999999999999998e-05
 final cost: 3.95e-05; prompt_tokens_cost_usd_dollar: 1.95e-05; completion_tokens_cost_usd_dollar: 1.9999999999999998e-05
 test_router_timeout.py .                                                 [100%]
 =============================== warnings summary ===============================
 ../../../../../../opt/homebrew/lib/python3.11/site-packages/pydantic/_internal/_config.py:284: 25 warnings
@ -99,10 +68,9 @@ final cost: 3.95e-05; prompt_tokens_cost_usd_dollar: 1.95e-05; completion_tokens
  /Users/krrishdholakia/Documents/litellm/litellm/utils.py:60: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice.
    with resources.open_text("litellm.llms.tokenizers", "anthropic_tokenizer.json") as f:
-test_streaming.py::test_bedrock_httpx_streaming[cohere.command-text-v14-False]
+test_router_timeout.py::test_router_timeouts_bedrock
 test_streaming.py::test_bedrock_httpx_streaming[cohere.command-text-v14-True]
  /opt/homebrew/lib/python3.11/site-packages/httpx/_content.py:204: DeprecationWarning: Use 'content=<...>' to upload raw bytes/text content.
    warnings.warn(message, DeprecationWarning)
 -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
-======================== 2 passed, 41 warnings in 4.94s ========================
+======================== 1 passed, 40 warnings in 0.99s ========================
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -8674,7 +8674,7 @@ def exception_type(
                        llm_provider="bedrock",
                        response=original_exception.response,
                    )
-                if "Malformed input request" in error_str:
+                elif "Malformed input request" in error_str:
                    exception_mapping_worked = True
                    raise BadRequestError(
                        message=f"BedrockException - {error_str}",
@ -8682,7 +8682,7 @@ def exception_type(
                        llm_provider="bedrock",
                        response=original_exception.response,
                    )
-                if (
+                elif (
                    "Unable to locate credentials" in error_str
                    or "The security token included in the request is invalid"
                    in error_str
@ -8694,7 +8694,7 @@ def exception_type(
                        llm_provider="bedrock",
                        response=original_exception.response,
                    )
-                if "AccessDeniedException" in error_str:
+                elif "AccessDeniedException" in error_str:
                    exception_mapping_worked = True
                    raise PermissionDeniedError(
                        message=f"BedrockException PermissionDeniedError - {error_str}",
@ -8702,7 +8702,7 @@ def exception_type(
                        llm_provider="bedrock",
                        response=original_exception.response,
                    )
-                if (
+                elif (
                    "throttlingException" in error_str
                    or "ThrottlingException" in error_str
                ):
@ -8713,7 +8713,7 @@ def exception_type(
                        llm_provider="bedrock",
                        response=original_exception.response,
                    )
-                if (
+                elif (
                    "Connect timeout on endpoint URL" in error_str
                    or "timed out" in error_str
                ):
@ -8723,7 +8723,7 @@ def exception_type(
                        model=model,
                        llm_provider="bedrock",
                    )
-                if hasattr(original_exception, "status_code"):
+                elif hasattr(original_exception, "status_code"):
                    if original_exception.status_code == 500:
                        exception_mapping_worked = True
                        raise ServiceUnavailableError(
@ -8761,6 +8761,49 @@ def exception_type(
                            model=model,
                            response=original_exception.response,
                        )
                    elif original_exception.status_code == 408:
                        exception_mapping_worked = True
                        raise Timeout(
                            message=f"BedrockException - {original_exception.message}",
                            model=model,
                            llm_provider=custom_llm_provider,
                            litellm_debug_info=extra_information,
                        )
                    elif original_exception.status_code == 422:
                        exception_mapping_worked = True
                        raise BadRequestError(
                            message=f"BedrockException - {original_exception.message}",
                            model=model,
                            llm_provider=custom_llm_provider,
                            response=original_exception.response,
                            litellm_debug_info=extra_information,
                        )
                    elif original_exception.status_code == 429:
                        exception_mapping_worked = True
                        raise RateLimitError(
                            message=f"BedrockException - {original_exception.message}",
                            model=model,
                            llm_provider=custom_llm_provider,
                            response=original_exception.response,
                            litellm_debug_info=extra_information,
                        )
                    elif original_exception.status_code == 503:
                        exception_mapping_worked = True
                        raise ServiceUnavailableError(
                            message=f"BedrockException - {original_exception.message}",
                            model=model,
                            llm_provider=custom_llm_provider,
                            response=original_exception.response,
                            litellm_debug_info=extra_information,
                        )
                    elif original_exception.status_code == 504:  # gateway timeout error
                        exception_mapping_worked = True
                        raise Timeout(
                            message=f"BedrockException - {original_exception.message}",
                            model=model,
                            llm_provider=custom_llm_provider,
                            litellm_debug_info=extra_information,
                        )
            elif custom_llm_provider == "sagemaker":
                if "Unable to locate credentials" in error_str:
                    exception_mapping_worked = True