handle hf rate limit error

2025-04-26 11:14:04 +00:00 · 2024-09-21 18:00:18 -07:00 · 2024-09-21 18:00:18 -07:00 · b987d99486
commit b987d99486
parent 664e3ed590
2 changed files with 11 additions and 0 deletions
--- a/litellm/tests/test_text_completion.py
+++ b/litellm/tests/test_text_completion.py
@ -3951,6 +3951,9 @@ def test_completion_hf_prompt_array():
        print(response.choices)
        assert len(response.choices) == 2
        # response_str = response["choices"][0]["text"]
    except litellm.RateLimitError:
        print("got rate limit error from hugging face... passsing")
        return
    except Exception as e:
        print(str(e))
        if "is currently loading" in str(e):
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -7455,6 +7455,14 @@ def exception_type(
                        model=model,
                        response=original_exception.response,
                    )
                elif "Rate limit reached" in error_str:
                    exception_mapping_worked = True
                    raise RateLimitError(
                        message=error_str,
                        llm_provider="huggingface",
                        model=model,
                        response=original_exception.response,
                    )
                if hasattr(original_exception, "status_code"):
                    if original_exception.status_code == 401:
                        exception_mapping_worked = True