diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index fda9a8790..6ebbe2ec9 100644
Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index 4021680ec..a9133ee9d 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/main.py b/litellm/main.py
index ec14064b0..5b1871b28 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -202,6 +202,7 @@ def completion(
         - If 'mock_response' is provided, a mock completion response is returned for testing or debugging.
     """
     ######### unpacking kwargs #####################
+    args = locals()
     return_async = kwargs.get('return_async', False)
     mock_response = kwargs.get('mock_response', None)
     api_key = kwargs.get('api_key', None)
@@ -216,9 +217,8 @@ def completion(
     metadata = kwargs.get('metadata', None)
     fallbacks = kwargs.get('fallbacks', [])
     ######## end of unpacking kwargs ###########
-    args = locals()
     openai_params = ["functions", "function_call", "temperature", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "metadata"]
-    litellm_params = ["caching", "return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "metadata", "fallbacks"]
+    litellm_params = ["acompletion", "caching", "return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "metadata", "fallbacks"]
     default_params = openai_params + litellm_params
     non_default_params = {k: v for k,v in kwargs.items() if k not in default_params} # model-specific params - pass them straight to the model/provider
     if mock_response:
@@ -797,7 +797,7 @@ def completion(
                 logging_obj=logging
             )
             # fake palm streaming
-            if stream == True:
+            if "stream" in optional_params and optional_params["stream"] == True:
                 # fake streaming for palm
                 resp_string = model_response["choices"][0]["message"]["content"]
                 response = CustomStreamWrapper(
@@ -836,7 +836,6 @@ def completion(
                 if k not in optional_params: 
                     optional_params[k] = v
 
-            print(f"optional_params: {optional_params}")
             ## LOGGING
             logging.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params})
 
@@ -979,7 +978,7 @@ def completion(
                 logging_obj=logging
             )
 
-            if stream==True: ## [BETA]
+            if "stream" in optional_params and optional_params["stream"]==True: ## [BETA]
                 # sagemaker does not support streaming as of now so we're faking streaming:
                 # https://discuss.huggingface.co/t/streaming-output-text-when-deploying-on-sagemaker/39611
                 # "SageMaker is currently not supporting streaming responses."
@@ -1009,7 +1008,7 @@ def completion(
             )
 
 
-            if stream == True:
+            if "stream" in optional_params and optional_params["stream"] == True:
                 # don't try to access stream object,
                 response = CustomStreamWrapper(
                     iter(model_response), model, custom_llm_provider="bedrock", logging_obj=logging
diff --git a/litellm/tests/test_async_fn.py b/litellm/tests/test_async_fn.py
index 4cb59a20c..80642ea01 100644
--- a/litellm/tests/test_async_fn.py
+++ b/litellm/tests/test_async_fn.py
@@ -19,10 +19,8 @@ async def test_get_response():
         response = await acompletion(model="gpt-3.5-turbo", messages=messages)
     except Exception as e:
         pass
-    return response
 
-
-# response = asyncio.run(test_get_response())
+response = asyncio.run(test_get_response())
 # print(response)
 
 @pytest.mark.asyncio
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index aa6c858f2..ba9390f16 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -47,7 +47,6 @@ def test_completion_claude():
         print(response.response_ms)
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
-test_completion_claude()
 
 def test_completion_claude_max_tokens():
     try:
@@ -531,7 +530,7 @@ def test_completion_openai_with_more_optional_params():
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
-test_completion_openai_with_more_optional_params()
+# test_completion_openai_with_more_optional_params()
 # def test_completion_openai_azure_with_functions():
 #     function1 = [
 #         {
@@ -916,7 +915,8 @@ def test_completion_bedrock_ai21():
 
 
 def test_completion_with_fallbacks():
-    fallbacks = ["gpt-3.5-turb", "gpt-3.5-turbo", "command-nightly"]
+    print(f"RUNNING TEST COMPLETION WITH FALLBACKS -  test_completion_with_fallbacks")
+    fallbacks = ["gpt-3.5-turbo", "gpt-3.5-turbo", "command-nightly"]
     try:
         response = completion(
             model="bad-model", messages=messages, force_timeout=120, fallbacks=fallbacks
@@ -926,6 +926,7 @@ def test_completion_with_fallbacks():
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
+test_completion_with_fallbacks()
 # def test_completion_with_fallbacks_multiple_keys():
 #     print(f"backup key 1: {os.getenv('BACKUP_OPENAI_API_KEY_1')}")
 #     print(f"backup key 2: {os.getenv('BACKUP_OPENAI_API_KEY_2')}")
diff --git a/litellm/utils.py b/litellm/utils.py
index 798ac457a..c03071362 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -1072,13 +1072,15 @@ def get_optional_params(  # use the openai defaults
             optional_params["stop"] = stop #TG AI expects a list, example ["\n\n\n\n","&lt;|endoftext|&gt;"]
     elif custom_llm_provider == "palm":
         ## check if unsupported param passed in 
-        supported_params = ["temperature", "top_p"]
+        supported_params = ["temperature", "top_p", "stream"]
         _check_valid_arg(supported_params=supported_params)
         
         if temperature:
             optional_params["temperature"] = temperature
         if top_p:
             optional_params["top_p"] = top_p
+        if stream:
+            optional_params["stream"] = stream
     elif (
         custom_llm_provider == "vertex_ai"
     ): 
@@ -1104,7 +1106,7 @@ def get_optional_params(  # use the openai defaults
             return_full_text: If True, input text will be part of the output generated text. If specified, it must be boolean. The default value for it is False.
             """
             ## check if unsupported param passed in 
-            supported_params = ["temperature", "max_tokens"]
+            supported_params = ["temperature", "max_tokens", "stream"]
             _check_valid_arg(supported_params=supported_params)
             
             if max_tokens:
@@ -1113,13 +1115,15 @@ def get_optional_params(  # use the openai defaults
                 optional_params["temperature"] = temperature
             if top_p:
                 optional_params["top_p"] = top_p
+            if stream:
+                optional_params["stream"] = stream
         else:
             ## check if unsupported param passed in 
             supported_params = []
             _check_valid_arg(supported_params=supported_params)
     elif custom_llm_provider == "bedrock":
         if "ai21" in model:
-            supported_params = ["max_tokens", "temperature", "stop", "top_p"]
+            supported_params = ["max_tokens", "temperature", "stop", "top_p", "stream"]
             _check_valid_arg(supported_params=supported_params)
             # params "maxTokens":200,"temperature":0,"topP":250,"stop_sequences":[],
             # https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=j2-ultra
@@ -1131,8 +1135,10 @@ def get_optional_params(  # use the openai defaults
                 optional_params["stop_sequences"] = stop
             if top_p:
                 optional_params["topP"] = top_p
+            if stream: 
+                optional_params["stream"] = stream
         elif "anthropic" in model:
-            supported_params = ["max_tokens", "temperature", "stop", "top_p"]
+            supported_params = ["max_tokens", "temperature", "stop", "top_p", "stream"]
             _check_valid_arg(supported_params=supported_params)
             # anthropic params on bedrock
             # \"max_tokens_to_sample\":300,\"temperature\":0.5,\"top_p\":1,\"stop_sequences\":[\"\\\\n\\\\nHuman:\"]}"
@@ -1146,8 +1152,10 @@ def get_optional_params(  # use the openai defaults
                 optional_params["top_p"] = top_p
             if stop:
                 optional_params["stop_sequences"] = stop
+            if stream: 
+                optional_params["stream"] = stream
         elif "amazon" in model: # amazon titan llms
-            supported_params = ["max_tokens", "temperature", "stop", "top_p"]
+            supported_params = ["max_tokens", "temperature", "stop", "top_p", "stream"]
             _check_valid_arg(supported_params=supported_params)
             # see https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=titan-large
             if max_tokens:
@@ -1158,7 +1166,8 @@ def get_optional_params(  # use the openai defaults
                 optional_params["stopSequences"] = stop
             if top_p:
                 optional_params["topP"] = top_p
-
+            if stream: 
+                optional_params["stream"] = stream
     elif model in litellm.aleph_alpha_models:
         supported_params = ["max_tokens", "stream", "top_p", "temperature", "presence_penalty", "frequency_penalty", "n", "stop"]
         _check_valid_arg(supported_params=supported_params)
@@ -3431,13 +3440,15 @@ def completion_with_split_tests(models={}, messages=[], use_client=False, overri
     return litellm.completion(model=selected_llm, messages=messages, use_client=use_client, **kwargs)
 
 def completion_with_fallbacks(**kwargs):
+    print(f"kwargs inside completion_with_fallbacks: {kwargs}")
+    nested_kwargs = kwargs.pop("kwargs")
     response = None
     rate_limited_models = set()
     model_expiration_times = {}
     start_time = time.time()
     original_model = kwargs["model"]
-    fallbacks = [kwargs["model"]] + kwargs["fallbacks"]
-    del kwargs["fallbacks"]  # remove fallbacks so it's not recursive
+    fallbacks = [kwargs["model"]] + nested_kwargs["fallbacks"]
+    del nested_kwargs["fallbacks"]  # remove fallbacks so it's not recursive
 
     while response == None and time.time() - start_time < 45:
         for model in fallbacks:
@@ -3466,8 +3477,10 @@ def completion_with_fallbacks(**kwargs):
                 if kwargs.get("model"):
                     del kwargs["model"]
 
+                print(f"trying to make completion call with model: {model}")
+                kwargs = {**kwargs, **nested_kwargs} # combine the openai + litellm params at the same level
                 response = litellm.completion(**kwargs, model=model)
-
+                print(f"response: {response}")
                 if response != None:
                     return response