handle hf rate limit error

This commit is contained in:
Ishaan Jaff 2024-09-21 18:00:18 -07:00
parent 664e3ed590
commit b987d99486
2 changed files with 11 additions and 0 deletions

View file

@ -3951,6 +3951,9 @@ def test_completion_hf_prompt_array():
print(response.choices)
assert len(response.choices) == 2
# response_str = response["choices"][0]["text"]
except litellm.RateLimitError:
print("got rate limit error from hugging face... passsing")
return
except Exception as e:
print(str(e))
if "is currently loading" in str(e):

View file

@ -7455,6 +7455,14 @@ def exception_type(
model=model,
response=original_exception.response,
)
elif "Rate limit reached" in error_str:
exception_mapping_worked = True
raise RateLimitError(
message=error_str,
llm_provider="huggingface",
model=model,
response=original_exception.response,
)
if hasattr(original_exception, "status_code"):
if original_exception.status_code == 401:
exception_mapping_worked = True