handle hf rate limit error

2024-09-21 18:00:18 -07:00 · 2024-09-21 18:00:18 -07:00 · b987d99486
commit b987d99486
parent 664e3ed590
2 changed files with 11 additions and 0 deletions
--- a/litellm/tests/test_text_completion.py
+++ b/litellm/tests/test_text_completion.py
@ -3951,6 +3951,9 @@ def test_completion_hf_prompt_array():
        print(response.choices)
        assert len(response.choices) == 2
        # response_str = response["choices"][0]["text"]
+    except litellm.RateLimitError:
+        print("got rate limit error from hugging face... passsing")
+        return
    except Exception as e:
        print(str(e))
        if "is currently loading" in str(e):
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -7455,6 +7455,14 @@ def exception_type(
                        model=model,
                        response=original_exception.response,
                    )
+                elif "Rate limit reached" in error_str:
+                    exception_mapping_worked = True
+                    raise RateLimitError(
+                        message=error_str,
+                        llm_provider="huggingface",
+                        model=model,
+                        response=original_exception.response,
+                    )
                if hasattr(original_exception, "status_code"):
                    if original_exception.status_code == 401:
                        exception_mapping_worked = True