diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index 14fa58e19d..36517095ba 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/llms/huggingface_restapi.py b/litellm/llms/huggingface_restapi.py index fbde78334e..385e3a810a 100644 --- a/litellm/llms/huggingface_restapi.py +++ b/litellm/llms/huggingface_restapi.py @@ -94,7 +94,10 @@ class HuggingfaceRestAPILLM: additional_args={"complete_input_dict": data}, ) ## RESPONSE OBJECT - completion_response = response.json() + try: + completion_response = response.json() + except: + raise HuggingfaceError(message=response.text, status_code=response.status_code) print_verbose(f"response: {completion_response}") if isinstance(completion_response, dict) and "error" in completion_response: print_verbose(f"completion error: {completion_response['error']}") diff --git a/litellm/utils.py b/litellm/utils.py index ea54943fc0..5fc1755286 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1462,6 +1462,13 @@ def exception_type(model, original_exception, custom_llm_provider): llm_provider="cohere", ) elif custom_llm_provider == "huggingface": + if "length limit exceeded" in error_str: + exception_mapping_worked = True + raise ContextWindowExceededError( + message=error_str, + model=model, + llm_provider="huggingface" + ) if hasattr(original_exception, "status_code"): if original_exception.status_code == 401: exception_mapping_worked = True