From b20f4f65b4008de1b4acc45cc32bca59267dd408 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 17 May 2024 17:16:36 -0700 Subject: [PATCH] fix(bedrock_httpx.py): raise better timeout exception --- litellm/llms/bedrock_httpx.py | 15 ++++++++-- litellm/tests/log.txt | 40 +++---------------------- litellm/utils.py | 55 +++++++++++++++++++++++++++++++---- 3 files changed, 65 insertions(+), 45 deletions(-) diff --git a/litellm/llms/bedrock_httpx.py b/litellm/llms/bedrock_httpx.py index 5d22c5ecbc..5fe0e0cc17 100644 --- a/litellm/llms/bedrock_httpx.py +++ b/litellm/llms/bedrock_httpx.py @@ -859,13 +859,14 @@ class BedrockLLM(BaseLLM): ) return streaming_response - response = self.client.post(url=prepped.url, headers=prepped.headers, data=data) # type: ignore - try: + response = self.client.post(url=prepped.url, headers=prepped.headers, data=data) # type: ignore response.raise_for_status() except httpx.HTTPStatusError as err: error_code = err.response.status_code raise BedrockError(status_code=error_code, message=response.text) + except httpx.TimeoutException as e: + raise BedrockError(status_code=408, message="Timeout error occurred.") return self.process_response( model=model, @@ -909,7 +910,15 @@ class BedrockLLM(BaseLLM): else: self.client = client # type: ignore - response = await self.client.post(api_base, headers=headers, data=data) # type: ignore + try: + response = await self.client.post(api_base, headers=headers, data=data) # type: ignore + response.raise_for_status() + except httpx.HTTPStatusError as err: + error_code = err.response.status_code + raise BedrockError(status_code=error_code, message=response.text) + except httpx.TimeoutException as e: + raise BedrockError(status_code=408, message="Timeout error occurred.") + return self.process_response( model=model, response=response, diff --git a/litellm/tests/log.txt b/litellm/tests/log.txt index c82f142963..fd9557c9b5 100644 --- a/litellm/tests/log.txt +++ b/litellm/tests/log.txt @@ -3,40 +3,9 @@ platform darwin -- Python 3.11.9, pytest-7.3.1, pluggy-1.3.0 rootdir: /Users/krrishdholakia/Documents/litellm/litellm/tests plugins: timeout-2.2.0, asyncio-0.23.2, anyio-3.7.1, xdist-3.3.1 asyncio: mode=Mode.STRICT -collected 2 items - -test_streaming.py .Logging Details LiteLLM-Async Success Call -Goes into checking if chunk has hiddden created at param -Chunks have a created at hidden param -Chunks sorted -token_counter messages received: [{'content': 'Hello, how are you?', 'role': 'user'}] -Token Counter - using generic token counter, for model=cohere.command-text-v14 -LiteLLM: Utils - Counting tokens for OpenAI model=gpt-3.5-turbo -.Token Counter - using generic token counter, for model=cohere.command-text-v14 -LiteLLM: Utils - Counting tokens for OpenAI model=gpt-3.5-turbo -Async success callbacks: Got a complete streaming response -Looking up model=cohere.command-text-v14 in model_cost_map -Success: model=cohere.command-text-v14 in model_cost_map -prompt_tokens=13; completion_tokens=10 -Returned custom cost for model=cohere.command-text-v14 - prompt_tokens_cost_usd_dollar: 1.95e-05, completion_tokens_cost_usd_dollar: 1.9999999999999998e-05 -final cost: 3.95e-05; prompt_tokens_cost_usd_dollar: 1.95e-05; completion_tokens_cost_usd_dollar: 1.9999999999999998e-05 - [100%]Logging Details LiteLLM-Success Call: None -success callbacks: [] -Goes into checking if chunk has hiddden created at param -Chunks have a created at hidden param -Chunks sorted -token_counter messages received: [{'content': 'Hello, how are you?', 'role': 'user'}] -Token Counter - using generic token counter, for model=cohere.command-text-v14 -LiteLLM: Utils - Counting tokens for OpenAI model=gpt-3.5-turbo -Token Counter - using generic token counter, for model=cohere.command-text-v14 -LiteLLM: Utils - Counting tokens for OpenAI model=gpt-3.5-turbo -Logging Details LiteLLM-Success Call streaming complete -Looking up model=cohere.command-text-v14 in model_cost_map -Success: model=cohere.command-text-v14 in model_cost_map -prompt_tokens=13; completion_tokens=10 -Returned custom cost for model=cohere.command-text-v14 - prompt_tokens_cost_usd_dollar: 1.95e-05, completion_tokens_cost_usd_dollar: 1.9999999999999998e-05 -final cost: 3.95e-05; prompt_tokens_cost_usd_dollar: 1.95e-05; completion_tokens_cost_usd_dollar: 1.9999999999999998e-05 +collected 1 item +test_router_timeout.py . [100%] =============================== warnings summary =============================== ../../../../../../opt/homebrew/lib/python3.11/site-packages/pydantic/_internal/_config.py:284: 25 warnings @@ -99,10 +68,9 @@ final cost: 3.95e-05; prompt_tokens_cost_usd_dollar: 1.95e-05; completion_tokens /Users/krrishdholakia/Documents/litellm/litellm/utils.py:60: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice. with resources.open_text("litellm.llms.tokenizers", "anthropic_tokenizer.json") as f: -test_streaming.py::test_bedrock_httpx_streaming[cohere.command-text-v14-False] -test_streaming.py::test_bedrock_httpx_streaming[cohere.command-text-v14-True] +test_router_timeout.py::test_router_timeouts_bedrock /opt/homebrew/lib/python3.11/site-packages/httpx/_content.py:204: DeprecationWarning: Use 'content=<...>' to upload raw bytes/text content. warnings.warn(message, DeprecationWarning) -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html -======================== 2 passed, 41 warnings in 4.94s ======================== +======================== 1 passed, 40 warnings in 0.99s ======================== diff --git a/litellm/utils.py b/litellm/utils.py index facabd3aa9..800a9cdab6 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -8674,7 +8674,7 @@ def exception_type( llm_provider="bedrock", response=original_exception.response, ) - if "Malformed input request" in error_str: + elif "Malformed input request" in error_str: exception_mapping_worked = True raise BadRequestError( message=f"BedrockException - {error_str}", @@ -8682,7 +8682,7 @@ def exception_type( llm_provider="bedrock", response=original_exception.response, ) - if ( + elif ( "Unable to locate credentials" in error_str or "The security token included in the request is invalid" in error_str @@ -8694,7 +8694,7 @@ def exception_type( llm_provider="bedrock", response=original_exception.response, ) - if "AccessDeniedException" in error_str: + elif "AccessDeniedException" in error_str: exception_mapping_worked = True raise PermissionDeniedError( message=f"BedrockException PermissionDeniedError - {error_str}", @@ -8702,7 +8702,7 @@ def exception_type( llm_provider="bedrock", response=original_exception.response, ) - if ( + elif ( "throttlingException" in error_str or "ThrottlingException" in error_str ): @@ -8713,7 +8713,7 @@ def exception_type( llm_provider="bedrock", response=original_exception.response, ) - if ( + elif ( "Connect timeout on endpoint URL" in error_str or "timed out" in error_str ): @@ -8723,7 +8723,7 @@ def exception_type( model=model, llm_provider="bedrock", ) - if hasattr(original_exception, "status_code"): + elif hasattr(original_exception, "status_code"): if original_exception.status_code == 500: exception_mapping_worked = True raise ServiceUnavailableError( @@ -8761,6 +8761,49 @@ def exception_type( model=model, response=original_exception.response, ) + elif original_exception.status_code == 408: + exception_mapping_worked = True + raise Timeout( + message=f"BedrockException - {original_exception.message}", + model=model, + llm_provider=custom_llm_provider, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 422: + exception_mapping_worked = True + raise BadRequestError( + message=f"BedrockException - {original_exception.message}", + model=model, + llm_provider=custom_llm_provider, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 429: + exception_mapping_worked = True + raise RateLimitError( + message=f"BedrockException - {original_exception.message}", + model=model, + llm_provider=custom_llm_provider, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 503: + exception_mapping_worked = True + raise ServiceUnavailableError( + message=f"BedrockException - {original_exception.message}", + model=model, + llm_provider=custom_llm_provider, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 504: # gateway timeout error + exception_mapping_worked = True + raise Timeout( + message=f"BedrockException - {original_exception.message}", + model=model, + llm_provider=custom_llm_provider, + litellm_debug_info=extra_information, + ) elif custom_llm_provider == "sagemaker": if "Unable to locate credentials" in error_str: exception_mapping_worked = True