fix(main.py): don't pass stream to petals

This commit is contained in:
Krrish Dholakia 2023-11-29 19:57:06 -08:00
parent 9780efca4b
commit c312ac4ca8
3 changed files with 19 additions and 13 deletions

View file

@ -403,7 +403,7 @@ class Router:
# if the function call is successful, no exception will be raised and we'll break out of the loop
response = await original_function(*args, **kwargs)
return response
except (Exception, asyncio.CancelledError) as e:
except Exception as e:
original_exception = e
### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR w/ fallbacks available
if ((isinstance(original_exception, litellm.ContextWindowExceededError) and context_window_fallbacks is None)
@ -411,10 +411,7 @@ class Router:
raise original_exception
### RETRY
#### check if it should retry + back-off if required
if isinstance(original_exception, asyncio.CancelledError):
timeout = 0 # immediately retry
await asyncio.sleep(timeout)
elif hasattr(original_exception, "status_code") and hasattr(original_exception, "response") and litellm._should_retry(status_code=original_exception.status_code):
if hasattr(original_exception, "status_code") and hasattr(original_exception, "response") and litellm._should_retry(status_code=original_exception.status_code):
if hasattr(original_exception.response, "headers"):
timeout = litellm._calculate_retry_after(remaining_retries=num_retries, max_retries=num_retries, response_headers=original_exception.response.headers)
else:
@ -432,11 +429,8 @@ class Router:
response = await response
return response
except (Exception, asyncio.CancelledError) as e:
if isinstance(original_exception, asyncio.CancelledError):
timeout = 0 # immediately retry
await asyncio.sleep(timeout)
elif hasattr(e, "status_code") and hasattr(e, "response") and litellm._should_retry(status_code=e.status_code):
except Exception as e:
if hasattr(e, "status_code") and hasattr(e, "response") and litellm._should_retry(status_code=e.status_code):
remaining_retries = num_retries - current_attempt
if hasattr(e.response, "headers"):
timeout = litellm._calculate_retry_after(remaining_retries=num_retries, max_retries=num_retries, response_headers=e.response.headers)