forked from phoenix/litellm-mirror
handle hf rate limit error
This commit is contained in:
parent
664e3ed590
commit
b987d99486
2 changed files with 11 additions and 0 deletions
|
@ -3951,6 +3951,9 @@ def test_completion_hf_prompt_array():
|
|||
print(response.choices)
|
||||
assert len(response.choices) == 2
|
||||
# response_str = response["choices"][0]["text"]
|
||||
except litellm.RateLimitError:
|
||||
print("got rate limit error from hugging face... passsing")
|
||||
return
|
||||
except Exception as e:
|
||||
print(str(e))
|
||||
if "is currently loading" in str(e):
|
||||
|
|
|
@ -7455,6 +7455,14 @@ def exception_type(
|
|||
model=model,
|
||||
response=original_exception.response,
|
||||
)
|
||||
elif "Rate limit reached" in error_str:
|
||||
exception_mapping_worked = True
|
||||
raise RateLimitError(
|
||||
message=error_str,
|
||||
llm_provider="huggingface",
|
||||
model=model,
|
||||
response=original_exception.response,
|
||||
)
|
||||
if hasattr(original_exception, "status_code"):
|
||||
if original_exception.status_code == 401:
|
||||
exception_mapping_worked = True
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue