diff --git a/docs/my-website/docs/completion/input.md b/docs/my-website/docs/completion/input.md
index 0eebb13a7a..892cc17c6b 100644
--- a/docs/my-website/docs/completion/input.md
+++ b/docs/my-website/docs/completion/input.md
@@ -32,7 +32,7 @@ This list is constantly being updated.
 |---|---|---|---|---|---|---|---|---|---|---|
 |Anthropic| ✅ | ✅ | ✅ | ✅ | ✅ |  |  |   |  |   |
 |OpenAI| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
-|Replicate | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |  |   |  |   |
+|Replicate | ✅ | ✅ | ✅ | ✅ | ✅ | |  |   |  |   |
 |Cohere| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |   |   |
 |Huggingface| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |  |  |   |    |
 |Openrouter| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
@@ -43,7 +43,7 @@ This list is constantly being updated.
 |TogetherAI| ✅ | ✅ | ✅ | ✅ | ✅ |  |  |   |  |   |
 |AlephAlpha| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |  |   |  |   |
 |Palm| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |  |  |  |   |
-|NLP Cloud| ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |  |  |  |   |
+|NLP Cloud| ✅ | ✅ | ✅ | ✅ | ✅ | |  |  |  |   |
 |Petals| ✅ | ✅ |  | ✅ | |  |   |  |  |   |
 |Ollama| ✅ | ✅ | ✅ | ✅ | ✅ |  |   | ✅ |  |   |n
 
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index 1f1eab4bb5..d3da8727b3 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/llms/ai21.py b/litellm/llms/ai21.py
index 3bf4c10044..6991dcedf9 100644
--- a/litellm/llms/ai21.py
+++ b/litellm/llms/ai21.py
@@ -1,10 +1,10 @@
-import os, types
+import os, types, traceback
 import json
 from enum import Enum
 import requests
 import time
 from typing import Callable, Optional
-from litellm.utils import ModelResponse
+from litellm.utils import ModelResponse, Choices, Message
 import litellm
 
 class AI21Error(Exception):
@@ -159,10 +159,14 @@ def completion(
             )
         else:
             try:
-                model_response["choices"][0]["message"]["content"] = completion_response["completions"][0]["data"]["text"]
-                model_response.choices[0].finish_reason = completion_response["completions"][0]["finishReason"]["reason"]
+                choices_list = []
+                for idx, item in enumerate(completion_response["completions"]):
+                    message_obj = Message(content=item["data"]["text"])
+                    choice_obj = Choices(finish_reason=item["finishReason"]["reason"], index=idx+1, message=message_obj)
+                    choices_list.append(choice_obj)
+                model_response["choices"] = choices_list
             except Exception as e:
-                raise AI21Error(message=json.dumps(completion_response), status_code=response.status_code)
+                raise AI21Error(message=traceback.format_exc(), status_code=response.status_code)
 
         ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here. 
         prompt_tokens = len(
diff --git a/litellm/llms/cohere.py b/litellm/llms/cohere.py
index dbd0c0ea8d..8b4bed0615 100644
--- a/litellm/llms/cohere.py
+++ b/litellm/llms/cohere.py
@@ -2,9 +2,9 @@ import os, types
 import json
 from enum import Enum
 import requests
-import time
+import time, traceback
 from typing import Callable, Optional
-from litellm.utils import ModelResponse
+from litellm.utils import ModelResponse, Choices, Message
 import litellm
 
 class CohereError(Exception):
@@ -156,11 +156,16 @@ def completion(
             )
         else:
             try:
-                model_response["choices"][0]["message"]["content"] = completion_response["generations"][0]["text"]
-            except:
-                raise CohereError(message=json.dumps(completion_response), status_code=response.status_code)
+                choices_list = []
+                for idx, item in enumerate(completion_response["generations"]):
+                    message_obj = Message(content=item["text"])
+                    choice_obj = Choices(finish_reason=item["finish_reason"], index=idx+1, message=message_obj)
+                    choices_list.append(choice_obj)
+                model_response["choices"] = choices_list
+            except Exception as e:
+                raise CohereError(message=traceback.format_exc(), status_code=response.status_code)
 
-        ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here. 
+        ## CALCULATING USAGE
         prompt_tokens = len(
             encoding.encode(prompt)
         ) 
diff --git a/litellm/llms/palm.py b/litellm/llms/palm.py
index 21e1f55421..5205a6cb50 100644
--- a/litellm/llms/palm.py
+++ b/litellm/llms/palm.py
@@ -1,9 +1,9 @@
-import os, types
+import os, types, traceback
 import json
 from enum import Enum
 import time
 from typing import Callable, Optional
-from litellm.utils import ModelResponse, get_secret
+from litellm.utils import ModelResponse, get_secret, Choices, Message
 import litellm
 import sys
 
@@ -33,7 +33,7 @@ class PalmConfig():
 
     - `top_p` (float): The API uses combined nucleus and top-k sampling. `top_p` configures the nucleus sampling. It sets the maximum cumulative probability of tokens to sample from.
 
-    - `maxOutputTokens` (int): Sets the maximum number of tokens to be returned in the output
+    - `max_output_tokens` (int): Sets the maximum number of tokens to be returned in the output
     """
     context: Optional[str]=None
     examples: Optional[list]=None
@@ -41,7 +41,7 @@ class PalmConfig():
     candidate_count: Optional[int]=None
     top_k: Optional[int]=None
     top_p: Optional[float]=None
-    maxOutputTokens: Optional[int]=None
+    max_output_tokens: Optional[int]=None
 
     def __init__(self,
                  context: Optional[str]=None,
@@ -50,7 +50,7 @@ class PalmConfig():
                  candidate_count: Optional[int]=None,
                  top_k: Optional[int]=None,
                  top_p: Optional[float]=None,
-                 maxOutputTokens: Optional[int]=None) -> None:
+                 max_output_tokens: Optional[int]=None) -> None:
         
         locals_ = locals()
         for key, value in locals_.items():
@@ -110,10 +110,16 @@ def completion(
     logging_obj.pre_call(
             input=prompt,
             api_key="",
-            additional_args={"complete_input_dict": {}},
+            additional_args={"complete_input_dict": {"optional_params": optional_params}},
         )
     ## COMPLETION CALL
-    response = palm.chat(messages=prompt)
+    try: 
+        response = palm.generate_text(prompt=prompt, **optional_params)
+    except Exception as e:
+        raise PalmError(
+            message=str(e),
+            status_code=500,
+        )
 
     ## LOGGING
     logging_obj.post_call(
@@ -124,18 +130,17 @@ def completion(
         )
     print_verbose(f"raw model_response: {response}")
     ## RESPONSE OBJECT
-    completion_response = response.last
-
-    if "error" in completion_response:
-        raise PalmError(
-            message=completion_response["error"],
-            status_code=response.status_code,
-        )
-    else:
-        try:
-            model_response["choices"][0]["message"]["content"] = completion_response
-        except:
-            raise PalmError(message=json.dumps(completion_response), status_code=response.status_code)
+    completion_response = response
+    try:
+        choices_list = []
+        for idx, item in enumerate(completion_response.candidates):
+            message_obj = Message(content=item["output"])
+            choice_obj = Choices(index=idx+1, message=message_obj)
+            choices_list.append(choice_obj)
+        model_response["choices"] = choices_list
+    except Exception as e:
+        traceback.print_exc()
+        raise PalmError(message=traceback.format_exc(), status_code=response.status_code)
 
     ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here. 
     prompt_tokens = len(
diff --git a/litellm/llms/together_ai.py b/litellm/llms/together_ai.py
index e5b5835240..6a134e2bda 100644
--- a/litellm/llms/together_ai.py
+++ b/litellm/llms/together_ai.py
@@ -161,7 +161,7 @@ def completion(
             raise TogetherAIError(
                 message=json.dumps(completion_response["output"]), status_code=response.status_code
             )
-
+    
         completion_text = completion_response["output"]["choices"][0]["text"]
 
         ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
diff --git a/litellm/tests/test_provider_specific_config.py b/litellm/tests/test_provider_specific_config.py
index bc4bef506c..9cf7ecb045 100644
--- a/litellm/tests/test_provider_specific_config.py
+++ b/litellm/tests/test_provider_specific_config.py
@@ -50,7 +50,7 @@ def claude_test_completion():
     try:
         # OVERRIDE WITH DYNAMIC MAX TOKENS
         response_1 = litellm.completion(
-            model="together_ai/togethercomputer/llama-2-70b-chat",
+            model="claude-instant-1",
             messages=[{ "content": "Hello, how are you?","role": "user"}],
             max_tokens=10
         )
@@ -60,7 +60,7 @@ def claude_test_completion():
 
         # USE CONFIG TOKENS
         response_2 = litellm.completion(
-            model="together_ai/togethercomputer/llama-2-70b-chat",
+            model="claude-instant-1",
             messages=[{ "content": "Hello, how are you?","role": "user"}],
         )
         # Add any assertions here to check the response
@@ -68,6 +68,14 @@ def claude_test_completion():
         response_2_text = response_2.choices[0].message.content
 
         assert len(response_2_text) > len(response_1_text)
+
+        try: 
+            response_3 = litellm.completion(model="claude-instant-1", 
+                                            messages=[{ "content": "Hello, how are you?","role": "user"}],
+                                            n=2)
+        
+        except Exception as e: 
+            print(e)
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
@@ -99,6 +107,12 @@ def replicate_test_completion():
         response_2_text = response_2.choices[0].message.content
 
         assert len(response_2_text) > len(response_1_text)
+        try: 
+            response_3 = litellm.completion(model="meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3", 
+                                                messages=[{ "content": "Hello, how are you?","role": "user"}],
+                                                n=2)
+        except: 
+            pass
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
@@ -107,8 +121,8 @@ def replicate_test_completion():
 #  Cohere
 
 def cohere_test_completion():
-    litellm.CohereConfig(max_tokens=200)
-    # litellm.set_verbose=True
+    # litellm.CohereConfig(max_tokens=200)
+    litellm.set_verbose=True
     try:
         # OVERRIDE WITH DYNAMIC MAX TOKENS
         response_1 = litellm.completion(
@@ -126,6 +140,11 @@ def cohere_test_completion():
         response_2_text = response_2.choices[0].message.content
 
         assert len(response_2_text) > len(response_1_text)
+
+        response_3 = litellm.completion(model="command-nightly", 
+                                            messages=[{ "content": "Hello, how are you?","role": "user"}],
+                                            n=2)
+        assert len(response_3.choices) > 1
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
@@ -135,7 +154,7 @@ def cohere_test_completion():
 
 def ai21_test_completion():
     litellm.AI21Config(maxTokens=10)
-    # litellm.set_verbose=True
+    litellm.set_verbose=True
     try:
         # OVERRIDE WITH DYNAMIC MAX TOKENS
         response_1 = litellm.completion(
@@ -155,6 +174,11 @@ def ai21_test_completion():
         print(f"response_2_text: {response_2_text}")
 
         assert len(response_2_text) < len(response_1_text)
+
+        response_3 = litellm.completion(model="j2-light", 
+                                            messages=[{ "content": "Hello, how are you?","role": "user"}],
+                                            n=2)
+        assert len(response_3.choices) > 1
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
@@ -164,7 +188,7 @@ def ai21_test_completion():
 
 def togetherai_test_completion():
     litellm.TogetherAIConfig(max_tokens=10)
-    # litellm.set_verbose=True
+    litellm.set_verbose=True
     try:
         # OVERRIDE WITH DYNAMIC MAX TOKENS
         response_1 = litellm.completion(
@@ -184,6 +208,14 @@ def togetherai_test_completion():
         print(f"response_2_text: {response_2_text}")
 
         assert len(response_2_text) < len(response_1_text)
+
+        try: 
+            response_3 = litellm.completion(model="together_ai/togethercomputer/llama-2-70b-chat", 
+                                                messages=[{ "content": "Hello, how are you?","role": "user"}],
+                                                n=2)
+            pytest.fail(f"Error not raised when n=2 passed to provider")
+        except: 
+            pass
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
@@ -192,7 +224,7 @@ def togetherai_test_completion():
 #  Palm
 
 def palm_test_completion():
-    litellm.PalmConfig(maxOutputTokens=10)
+    litellm.PalmConfig(max_output_tokens=10, temperature=0.9)
     # litellm.set_verbose=True
     try:
         # OVERRIDE WITH DYNAMIC MAX TOKENS
@@ -213,6 +245,11 @@ def palm_test_completion():
         print(f"response_2_text: {response_2_text}")
 
         assert len(response_2_text) < len(response_1_text)
+
+        response_3 = litellm.completion(model="palm/chat-bison", 
+                                                messages=[{ "content": "Hello, how are you?","role": "user"}],
+                                                n=2)
+        assert len(response_3.choices) > 1
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
@@ -242,6 +279,14 @@ def nlp_cloud_test_completion():
         print(f"response_2_text: {response_2_text}")
 
         assert len(response_2_text) < len(response_1_text)
+
+        try: 
+            response_3 = litellm.completion(model="dolphin", 
+                                                messages=[{ "content": "Hello, how are you?","role": "user"}],
+                                                n=2)
+            pytest.fail(f"Error not raised when n=2 passed to provider")
+        except: 
+            pass
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
@@ -271,6 +316,14 @@ def aleph_alpha_test_completion():
         print(f"response_2_text: {response_2_text}")
 
         assert len(response_2_text) < len(response_1_text)
+
+        try: 
+            response_3 = litellm.completion(model="luminous-base", 
+                                                messages=[{ "content": "Hello, how are you?","role": "user"}],
+                                                n=2)
+            pytest.fail(f"Error not raised when n=2 passed to provider")
+        except: 
+            pass
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
diff --git a/litellm/utils.py b/litellm/utils.py
index 3bdf519180..a4ffa79180 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -97,6 +97,15 @@ last_fetched_at_keys = None
 #  'usage': {'prompt_tokens': 18, 'completion_tokens': 23, 'total_tokens': 41}
 # }
 
+class UnsupportedParamsError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+
+
 def _generate_id(): # private helper function
     return 'chatcmpl-' + str(uuid.uuid4())
 
@@ -1008,7 +1017,7 @@ def get_optional_params(  # use the openai defaults
             if litellm.add_function_to_prompt: # if user opts to add it to prompt instead
                 optional_params["functions_unsupported_model"] = non_default_params.pop("functions")
             else: 
-                raise ValueError(f"LiteLLM.Exception: Function calling is not supported by {custom_llm_provider}. To add it to the prompt, set `litellm.add_function_to_prompt = True`.")
+                raise UnsupportedParamsError(status_code=500, message=f"Function calling is not supported by {custom_llm_provider}. To add it to the prompt, set `litellm.add_function_to_prompt = True`.")
 
     def _check_valid_arg(supported_params): 
         print_verbose(f"checking params for {model}")
@@ -1025,7 +1034,7 @@ def get_optional_params(  # use the openai defaults
                 else: 
                     unsupported_params[k] = non_default_params[k]
         if unsupported_params and not litellm.drop_params:
-            raise ValueError(f"LiteLLM.Exception: {custom_llm_provider} does not support parameters: {unsupported_params}. To drop these, set `litellm.drop_params=True`.")
+            raise UnsupportedParamsError(status_code=500, message=f"{custom_llm_provider} does not support parameters: {unsupported_params}. To drop these, set `litellm.drop_params=True`.")
 
     ## raise exception if provider doesn't support passed in param 
     if custom_llm_provider == "anthropic":
@@ -1163,7 +1172,7 @@ def get_optional_params(  # use the openai defaults
         if stop: 
             optional_params["stopSequences"] = stop
         if max_tokens: 
-            optional_params["maxOutputTokens"] = max_tokens
+            optional_params["max_output_tokens"] = max_tokens
     elif (
         custom_llm_provider == "vertex_ai"
     ):