diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc
index c3290d45bf..c52cbbcd38 100644
Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index 0f7870fb4c..a9c406148e 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/llms/cohere.py b/litellm/llms/cohere.py
index 8b4bed0615..e994fa235d 100644
--- a/litellm/llms/cohere.py
+++ b/litellm/llms/cohere.py
@@ -163,7 +163,7 @@ def completion(
                     choices_list.append(choice_obj)
                 model_response["choices"] = choices_list
             except Exception as e:
-                raise CohereError(message=traceback.format_exc(), status_code=response.status_code)
+                raise CohereError(message=response.text, status_code=response.status_code)
 
         ## CALCULATING USAGE
         prompt_tokens = len(
diff --git a/litellm/llms/palm.py b/litellm/llms/palm.py
index 5205a6cb50..12eb3ac4c3 100644
--- a/litellm/llms/palm.py
+++ b/litellm/llms/palm.py
@@ -1,4 +1,4 @@
-import os, types, traceback
+import os, types, traceback, copy
 import json
 from enum import Enum
 import time
@@ -87,10 +87,12 @@ def completion(
     model = model
     
     ## Load Config
+    inference_params = copy.deepcopy(optional_params)
+    inference_params.pop("stream") # palm does not support streaming, so we handle this by fake streaming in main.py
     config = litellm.PalmConfig.get_config() 
     for k, v in config.items(): 
-        if k not in optional_params: # completion(top_k=3) > palm_config(top_k=3) <- allows for dynamic variables to be passed in
-            optional_params[k] = v
+        if k not in inference_params: # completion(top_k=3) > palm_config(top_k=3) <- allows for dynamic variables to be passed in
+            inference_params[k] = v
 
     prompt = ""
     for message in messages:
@@ -110,11 +112,11 @@ def completion(
     logging_obj.pre_call(
             input=prompt,
             api_key="",
-            additional_args={"complete_input_dict": {"optional_params": optional_params}},
+            additional_args={"complete_input_dict": {"inference_params": inference_params}},
         )
     ## COMPLETION CALL
     try: 
-        response = palm.generate_text(prompt=prompt, **optional_params)
+        response = palm.generate_text(prompt=prompt, **inference_params)
     except Exception as e:
         raise PalmError(
             message=str(e),
diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py
index b3a2626380..8a29950a2d 100644
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@@ -45,7 +45,7 @@ def test_context_window(model):
     with pytest.raises(ContextWindowExceededError):
         completion(model=model, messages=messages)
 
-test_context_window(model="gpt-3.5-turbo")
+# test_context_window(model="command-nightly")
 # Test 2: InvalidAuth Errors
 @pytest.mark.parametrize("model", models)
 def invalid_auth(model):  # set the model key to an invalid key, depending on the model
diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py
index 8b2e9b9520..fc46bb17e6 100644
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@@ -453,7 +453,7 @@ def test_completion_palm_stream():
         print(f"completion_response: {complete_response}")
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
-# test_completion_palm_stream()
+test_completion_palm_stream()
 
 # def test_completion_deep_infra_stream():
 #     # deep infra currently includes role in the 2nd chunk