fix(bedrock_httpx.py): raise better timeout exception

This commit is contained in:
Krrish Dholakia 2024-05-17 17:16:36 -07:00
parent b486bb1f8b
commit 4682802444
3 changed files with 65 additions and 45 deletions

View file

@ -859,13 +859,14 @@ class BedrockLLM(BaseLLM):
) )
return streaming_response return streaming_response
response = self.client.post(url=prepped.url, headers=prepped.headers, data=data) # type: ignore
try: try:
response = self.client.post(url=prepped.url, headers=prepped.headers, data=data) # type: ignore
response.raise_for_status() response.raise_for_status()
except httpx.HTTPStatusError as err: except httpx.HTTPStatusError as err:
error_code = err.response.status_code error_code = err.response.status_code
raise BedrockError(status_code=error_code, message=response.text) raise BedrockError(status_code=error_code, message=response.text)
except httpx.TimeoutException as e:
raise BedrockError(status_code=408, message="Timeout error occurred.")
return self.process_response( return self.process_response(
model=model, model=model,
@ -909,7 +910,15 @@ class BedrockLLM(BaseLLM):
else: else:
self.client = client # type: ignore self.client = client # type: ignore
response = await self.client.post(api_base, headers=headers, data=data) # type: ignore try:
response = await self.client.post(api_base, headers=headers, data=data) # type: ignore
response.raise_for_status()
except httpx.HTTPStatusError as err:
error_code = err.response.status_code
raise BedrockError(status_code=error_code, message=response.text)
except httpx.TimeoutException as e:
raise BedrockError(status_code=408, message="Timeout error occurred.")
return self.process_response( return self.process_response(
model=model, model=model,
response=response, response=response,

View file

@ -3,40 +3,9 @@ platform darwin -- Python 3.11.9, pytest-7.3.1, pluggy-1.3.0
rootdir: /Users/krrishdholakia/Documents/litellm/litellm/tests rootdir: /Users/krrishdholakia/Documents/litellm/litellm/tests
plugins: timeout-2.2.0, asyncio-0.23.2, anyio-3.7.1, xdist-3.3.1 plugins: timeout-2.2.0, asyncio-0.23.2, anyio-3.7.1, xdist-3.3.1
asyncio: mode=Mode.STRICT asyncio: mode=Mode.STRICT
collected 2 items collected 1 item
test_streaming.py .Logging Details LiteLLM-Async Success Call
Goes into checking if chunk has hiddden created at param
Chunks have a created at hidden param
Chunks sorted
token_counter messages received: [{'content': 'Hello, how are you?', 'role': 'user'}]
Token Counter - using generic token counter, for model=cohere.command-text-v14
LiteLLM: Utils - Counting tokens for OpenAI model=gpt-3.5-turbo
.Token Counter - using generic token counter, for model=cohere.command-text-v14
LiteLLM: Utils - Counting tokens for OpenAI model=gpt-3.5-turbo
Async success callbacks: Got a complete streaming response
Looking up model=cohere.command-text-v14 in model_cost_map
Success: model=cohere.command-text-v14 in model_cost_map
prompt_tokens=13; completion_tokens=10
Returned custom cost for model=cohere.command-text-v14 - prompt_tokens_cost_usd_dollar: 1.95e-05, completion_tokens_cost_usd_dollar: 1.9999999999999998e-05
final cost: 3.95e-05; prompt_tokens_cost_usd_dollar: 1.95e-05; completion_tokens_cost_usd_dollar: 1.9999999999999998e-05
[100%]Logging Details LiteLLM-Success Call: None
success callbacks: []
Goes into checking if chunk has hiddden created at param
Chunks have a created at hidden param
Chunks sorted
token_counter messages received: [{'content': 'Hello, how are you?', 'role': 'user'}]
Token Counter - using generic token counter, for model=cohere.command-text-v14
LiteLLM: Utils - Counting tokens for OpenAI model=gpt-3.5-turbo
Token Counter - using generic token counter, for model=cohere.command-text-v14
LiteLLM: Utils - Counting tokens for OpenAI model=gpt-3.5-turbo
Logging Details LiteLLM-Success Call streaming complete
Looking up model=cohere.command-text-v14 in model_cost_map
Success: model=cohere.command-text-v14 in model_cost_map
prompt_tokens=13; completion_tokens=10
Returned custom cost for model=cohere.command-text-v14 - prompt_tokens_cost_usd_dollar: 1.95e-05, completion_tokens_cost_usd_dollar: 1.9999999999999998e-05
final cost: 3.95e-05; prompt_tokens_cost_usd_dollar: 1.95e-05; completion_tokens_cost_usd_dollar: 1.9999999999999998e-05
test_router_timeout.py . [100%]
=============================== warnings summary =============================== =============================== warnings summary ===============================
../../../../../../opt/homebrew/lib/python3.11/site-packages/pydantic/_internal/_config.py:284: 25 warnings ../../../../../../opt/homebrew/lib/python3.11/site-packages/pydantic/_internal/_config.py:284: 25 warnings
@ -99,10 +68,9 @@ final cost: 3.95e-05; prompt_tokens_cost_usd_dollar: 1.95e-05; completion_tokens
/Users/krrishdholakia/Documents/litellm/litellm/utils.py:60: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice. /Users/krrishdholakia/Documents/litellm/litellm/utils.py:60: DeprecationWarning: open_text is deprecated. Use files() instead. Refer to https://importlib-resources.readthedocs.io/en/latest/using.html#migrating-from-legacy for migration advice.
with resources.open_text("litellm.llms.tokenizers", "anthropic_tokenizer.json") as f: with resources.open_text("litellm.llms.tokenizers", "anthropic_tokenizer.json") as f:
test_streaming.py::test_bedrock_httpx_streaming[cohere.command-text-v14-False] test_router_timeout.py::test_router_timeouts_bedrock
test_streaming.py::test_bedrock_httpx_streaming[cohere.command-text-v14-True]
/opt/homebrew/lib/python3.11/site-packages/httpx/_content.py:204: DeprecationWarning: Use 'content=<...>' to upload raw bytes/text content. /opt/homebrew/lib/python3.11/site-packages/httpx/_content.py:204: DeprecationWarning: Use 'content=<...>' to upload raw bytes/text content.
warnings.warn(message, DeprecationWarning) warnings.warn(message, DeprecationWarning)
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
======================== 2 passed, 41 warnings in 4.94s ======================== ======================== 1 passed, 40 warnings in 0.99s ========================

View file

@ -8674,7 +8674,7 @@ def exception_type(
llm_provider="bedrock", llm_provider="bedrock",
response=original_exception.response, response=original_exception.response,
) )
if "Malformed input request" in error_str: elif "Malformed input request" in error_str:
exception_mapping_worked = True exception_mapping_worked = True
raise BadRequestError( raise BadRequestError(
message=f"BedrockException - {error_str}", message=f"BedrockException - {error_str}",
@ -8682,7 +8682,7 @@ def exception_type(
llm_provider="bedrock", llm_provider="bedrock",
response=original_exception.response, response=original_exception.response,
) )
if ( elif (
"Unable to locate credentials" in error_str "Unable to locate credentials" in error_str
or "The security token included in the request is invalid" or "The security token included in the request is invalid"
in error_str in error_str
@ -8694,7 +8694,7 @@ def exception_type(
llm_provider="bedrock", llm_provider="bedrock",
response=original_exception.response, response=original_exception.response,
) )
if "AccessDeniedException" in error_str: elif "AccessDeniedException" in error_str:
exception_mapping_worked = True exception_mapping_worked = True
raise PermissionDeniedError( raise PermissionDeniedError(
message=f"BedrockException PermissionDeniedError - {error_str}", message=f"BedrockException PermissionDeniedError - {error_str}",
@ -8702,7 +8702,7 @@ def exception_type(
llm_provider="bedrock", llm_provider="bedrock",
response=original_exception.response, response=original_exception.response,
) )
if ( elif (
"throttlingException" in error_str "throttlingException" in error_str
or "ThrottlingException" in error_str or "ThrottlingException" in error_str
): ):
@ -8713,7 +8713,7 @@ def exception_type(
llm_provider="bedrock", llm_provider="bedrock",
response=original_exception.response, response=original_exception.response,
) )
if ( elif (
"Connect timeout on endpoint URL" in error_str "Connect timeout on endpoint URL" in error_str
or "timed out" in error_str or "timed out" in error_str
): ):
@ -8723,7 +8723,7 @@ def exception_type(
model=model, model=model,
llm_provider="bedrock", llm_provider="bedrock",
) )
if hasattr(original_exception, "status_code"): elif hasattr(original_exception, "status_code"):
if original_exception.status_code == 500: if original_exception.status_code == 500:
exception_mapping_worked = True exception_mapping_worked = True
raise ServiceUnavailableError( raise ServiceUnavailableError(
@ -8761,6 +8761,49 @@ def exception_type(
model=model, model=model,
response=original_exception.response, response=original_exception.response,
) )
elif original_exception.status_code == 408:
exception_mapping_worked = True
raise Timeout(
message=f"BedrockException - {original_exception.message}",
model=model,
llm_provider=custom_llm_provider,
litellm_debug_info=extra_information,
)
elif original_exception.status_code == 422:
exception_mapping_worked = True
raise BadRequestError(
message=f"BedrockException - {original_exception.message}",
model=model,
llm_provider=custom_llm_provider,
response=original_exception.response,
litellm_debug_info=extra_information,
)
elif original_exception.status_code == 429:
exception_mapping_worked = True
raise RateLimitError(
message=f"BedrockException - {original_exception.message}",
model=model,
llm_provider=custom_llm_provider,
response=original_exception.response,
litellm_debug_info=extra_information,
)
elif original_exception.status_code == 503:
exception_mapping_worked = True
raise ServiceUnavailableError(
message=f"BedrockException - {original_exception.message}",
model=model,
llm_provider=custom_llm_provider,
response=original_exception.response,
litellm_debug_info=extra_information,
)
elif original_exception.status_code == 504: # gateway timeout error
exception_mapping_worked = True
raise Timeout(
message=f"BedrockException - {original_exception.message}",
model=model,
llm_provider=custom_llm_provider,
litellm_debug_info=extra_information,
)
elif custom_llm_provider == "sagemaker": elif custom_llm_provider == "sagemaker":
if "Unable to locate credentials" in error_str: if "Unable to locate credentials" in error_str:
exception_mapping_worked = True exception_mapping_worked = True