diff --git a/litellm/main.py b/litellm/main.py
index e98d88656..ffe562641 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -1313,11 +1313,12 @@ def completion(
             or model in litellm.petals_models
         ):
             api_base = (
-                litellm.api_base or
-                api_base 
+                api_base or
+                litellm.api_base 
             )
 
             custom_llm_provider = "petals"
+            stream = optional_params.pop("stream", False)
             model_response = petals.completion(
                 model=model,
                 messages=messages,
diff --git a/litellm/router.py b/litellm/router.py
index fab7e58c2..9135406fd 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -403,7 +403,7 @@ class Router:
             # if the function call is successful, no exception will be raised and we'll break out of the loop
             response = await original_function(*args, **kwargs)
             return response
-        except (Exception, asyncio.CancelledError) as e: 
+        except Exception as e: 
             original_exception = e
             ### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR w/ fallbacks available
             if ((isinstance(original_exception, litellm.ContextWindowExceededError) and context_window_fallbacks is None) 
@@ -411,10 +411,7 @@ class Router:
                 raise original_exception
             ### RETRY
             #### check if it should retry + back-off if required
-            if isinstance(original_exception, asyncio.CancelledError):
-                timeout = 0 # immediately retry
-                await asyncio.sleep(timeout)
-            elif hasattr(original_exception, "status_code") and hasattr(original_exception, "response") and litellm._should_retry(status_code=original_exception.status_code):
+            if hasattr(original_exception, "status_code") and hasattr(original_exception, "response") and litellm._should_retry(status_code=original_exception.status_code):
                 if hasattr(original_exception.response, "headers"):
                     timeout = litellm._calculate_retry_after(remaining_retries=num_retries, max_retries=num_retries, response_headers=original_exception.response.headers)
                 else:
@@ -432,11 +429,8 @@ class Router:
                         response = await response
                     return response
                 
-                except (Exception, asyncio.CancelledError) as e: 
-                    if isinstance(original_exception, asyncio.CancelledError):
-                        timeout = 0 # immediately retry
-                        await asyncio.sleep(timeout)
-                    elif hasattr(e, "status_code") and hasattr(e, "response") and litellm._should_retry(status_code=e.status_code):
+                except Exception as e: 
+                    if hasattr(e, "status_code") and hasattr(e, "response") and litellm._should_retry(status_code=e.status_code):
                         remaining_retries = num_retries - current_attempt
                         if hasattr(e.response, "headers"):
                             timeout = litellm._calculate_retry_after(remaining_retries=num_retries, max_retries=num_retries, response_headers=e.response.headers)
diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py
index 597020942..1f51e54bb 100644
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@@ -631,7 +631,7 @@ def test_completion_bedrock_ai21_stream():
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
-test_completion_bedrock_ai21_stream()
+# test_completion_bedrock_ai21_stream()
 
 # def test_completion_sagemaker_stream():
 #     try:
@@ -760,6 +760,17 @@ def hf_test_completion_tgi_stream():
         pytest.fail(f"Error occurred: {e}")
 # hf_test_completion_tgi_stream()
 
+def test_petals(): 
+    print(f"making petals call")
+    response = completion(
+        model="petals/meta-llama/Llama-2-70b-chat-hf", 
+        messages=[{ "content": "Hello, how are you?","role": "user"}], 
+        # stream=True,
+        base_url="https://chat.petals.dev/api/v1/generate"
+    )
+
+    print(f"response: {response}")
+test_petals()
 # def test_completion_aleph_alpha():
 #     try:
 #         response = completion(