diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index 6459cd2d8..9cef80ae0 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/llms/huggingface_restapi.py b/litellm/llms/huggingface_restapi.py
index 90abdb2d2..0739d1e92 100644
--- a/litellm/llms/huggingface_restapi.py
+++ b/litellm/llms/huggingface_restapi.py
@@ -117,7 +117,6 @@ def completion(
         inference_params = copy.deepcopy(optional_params)
         inference_params.pop("details")
         inference_params.pop("return_full_text")
-        inference_params.pop("task")
         past_user_inputs = []
         generated_responses = []
         text = ""
@@ -181,7 +180,6 @@ def completion(
         inference_params = copy.deepcopy(optional_params)
         inference_params.pop("details")
         inference_params.pop("return_full_text")
-        inference_params.pop("task")
         data = {
             "inputs": prompt,
             "parameters": inference_params,
diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py
index f016ae97d..1bf8eadf9 100644
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@@ -351,7 +351,7 @@ def test_completion_cohere_stream_bad_key():
 #             },
 #         ]
 #         response = completion(
-#             model="huggingface/mistralai/Mistral-7B-Instruct-v0.1", messages=messages, api_base="https://n9ox93a8sv5ihsow.us-east-1.aws.endpoints.huggingface.cloud", stream=True, max_tokens=1000
+#             model="huggingface/mistralai/Mistral-7B-Instruct-v0.1", messages=messages, api_base="https://n9ox93a8sv5ihsow.us-east-1.aws.endpoints.huggingface.cloud", stream=True, max_tokens=1000, n=1
 #         )
 #         complete_response = ""
 #         # Add any assertions here to check the response
diff --git a/litellm/utils.py b/litellm/utils.py
index b7995863d..122659829 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -1039,7 +1039,7 @@ def get_optional_params(  # use the openai defaults
             optional_params["stop_sequences"] = stop
     elif custom_llm_provider == "huggingface":
         ## check if unsupported param passed in 
-        supported_params = ["stream", "temperature", "max_tokens", "top_p", "stop",]
+        supported_params = ["stream", "temperature", "max_tokens", "top_p", "stop", "n"]
         _check_valid_arg(supported_params=supported_params)
         
         if temperature:
@@ -1055,6 +1055,8 @@ def get_optional_params(  # use the openai defaults
             optional_params["stop"] = stop
         if max_tokens:
             optional_params["max_new_tokens"] = max_tokens
+        if n: 
+            optional_params["best_of"] = n
         if presence_penalty:
             optional_params["repetition_penalty"] = presence_penalty
     elif custom_llm_provider == "together_ai":
diff --git a/pyproject.toml b/pyproject.toml
index eedbb81a7..1bc0aa326 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.815"
+version = "0.1.816"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"