diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index d4386ac9c..45a2b185e 100644
Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index b26f9b7a7..2f5100f88 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/llms/huggingface_restapi.py b/litellm/llms/huggingface_restapi.py
index e97c50874..e2fccb569 100644
--- a/litellm/llms/huggingface_restapi.py
+++ b/litellm/llms/huggingface_restapi.py
@@ -40,7 +40,9 @@ def completion(
     logger_fn=None,
 ):
     headers = validate_environment(api_key)
+    task = optional_params.pop("task")
     completion_url = ""
+    input_text = None
     if "https" in model:
         completion_url = model
     elif api_base:
@@ -49,37 +51,62 @@ def completion(
         completion_url = os.getenv("HF_API_BASE", "")
     else:
         completion_url = f"https://api-inference.huggingface.co/models/{model}"
-    if model in custom_prompt_dict:
-        # check if the model has a registered custom prompt
-        model_prompt_details = custom_prompt_dict[model]
-        prompt = custom_prompt(
-            role_dict=model_prompt_details["roles"], 
-            initial_prompt_value=model_prompt_details["initial_prompt_value"],  
-            final_prompt_value=model_prompt_details["final_prompt_value"], 
-            messages=messages
-        )
-    else:
-        prompt = prompt_factory(model=model, messages=messages)
+
     ### MAP INPUT PARAMS
-    if "https://api-inference.huggingface.co/models" in completion_url: 
+    if task == "conversational":
         inference_params = copy.deepcopy(optional_params)
         inference_params.pop("details")
+        past_user_inputs = []
+        generated_responses = []
+        text = ""
+        for message in messages:
+            if message["role"] == "user":
+                if text != "":
+                    past_user_inputs.append(text)
+                text = message["content"]
+            elif message["role"] == "assistant" or message["role"] == "system":
+                generated_responses.append(message["content"])
         data = {
-            "inputs": prompt,
-            "parameters": inference_params,
-            "stream": True if "stream" in inference_params and inference_params["stream"] == True else False,
-        }
-    else:
-        data = {
-            "inputs": prompt,
-            "parameters": optional_params,
-            "stream": True if "stream" in optional_params and optional_params["stream"] == True else False,
+            "inputs": {
+                "text": text, 
+                "past_user_inputs": past_user_inputs, 
+                "generated_responses": generated_responses
+            },
+            "parameters": inference_params
         }
+        input_text = "".join(message["content"] for message in messages)
+    elif task == "text-generation-inference":
+        if model in custom_prompt_dict:
+            # check if the model has a registered custom prompt
+            model_prompt_details = custom_prompt_dict[model]
+            prompt = custom_prompt(
+                role_dict=model_prompt_details["roles"], 
+                initial_prompt_value=model_prompt_details["initial_prompt_value"],  
+                final_prompt_value=model_prompt_details["final_prompt_value"], 
+                messages=messages
+            )
+        else:
+            prompt = prompt_factory(model=model, messages=messages)
+        if "https://api-inference.huggingface.co/models" in completion_url: 
+            inference_params = copy.deepcopy(optional_params)
+            inference_params.pop("details")
+            data = {
+                "inputs": prompt,
+                "parameters": inference_params,
+                "stream": True if "stream" in inference_params and inference_params["stream"] == True else False,
+            }
+        else:
+            data = {
+                "inputs": prompt,
+                "parameters": optional_params,
+                "stream": True if "stream" in optional_params and optional_params["stream"] == True else False,
+            }
+        input_text = prompt
     ## LOGGING
     logging_obj.pre_call(
-            input=prompt,
+            input=input_text,
             api_key=api_key,
-            additional_args={"complete_input_dict": data},
+            additional_args={"complete_input_dict": data, "task": task},
         )
     ## COMPLETION CALL
     if "stream" in optional_params and optional_params["stream"] == True:
@@ -98,10 +125,10 @@ def completion(
         )
         ## LOGGING
         logging_obj.post_call(
-            input=prompt,
+            input=input_text,
             api_key=api_key,
             original_response=response.text,
-            additional_args={"complete_input_dict": data},
+            additional_args={"complete_input_dict": data, "task": task},
         )
         ## RESPONSE OBJECT
         try:
@@ -119,19 +146,23 @@ def completion(
                 status_code=response.status_code,
             )
         else:
-            model_response["choices"][0]["message"][
-                "content"
-            ] = completion_response[0]["generated_text"]
-        
-        ## GETTING LOGPROBS 
-        if "details" in completion_response[0] and "tokens" in completion_response[0]["details"]:
-            sum_logprob = 0
-            for token in completion_response[0]["details"]["tokens"]:
-                sum_logprob += token["logprob"]
-            model_response["choices"][0]["message"]["logprobs"] = sum_logprob
+            if task == "conversational": 
+                model_response["choices"][0]["message"][
+                    "content"
+                ] = completion_response["generated_text"]
+            elif task == "text-generation-inference": 
+                model_response["choices"][0]["message"][
+                    "content"
+                ] = completion_response[0]["generated_text"]    
+                ## GETTING LOGPROBS 
+                if "details" in completion_response[0] and "tokens" in completion_response[0]["details"]:
+                    sum_logprob = 0
+                    for token in completion_response[0]["details"]["tokens"]:
+                        sum_logprob += token["logprob"]
+                    model_response["choices"][0]["message"]["logprobs"] = sum_logprob
         ## CALCULATING USAGE
         prompt_tokens = len(
-            encoding.encode(prompt)
+            encoding.encode(input_text)
         )  ##[TODO] use the llama2 tokenizer here
         completion_tokens = len(
             encoding.encode(model_response["choices"][0]["message"]["content"])
diff --git a/litellm/main.py b/litellm/main.py
index a0b233861..a7d9d627b 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -109,8 +109,8 @@ def completion(
     use_client=False,
     id=None, # this is an optional param to tag individual completion calls 
     # model specific optional params
-    # used by text-bison only
-    top_k=40,
+    top_k=40,# used by text-bison only
+    task: Optional[str]="text-generation-inference", # used by huggingface inference endpoints
     request_timeout=0,  # unused var for old version of OpenAI API
     fallbacks=[],
     caching = False,
@@ -154,6 +154,7 @@ def completion(
             model=model,
             custom_llm_provider=custom_llm_provider,
             top_k=top_k,
+            task=task
         )
         # For logging - save the values of the litellm-specific params passed in
         litellm_params = get_litellm_params(
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index d1d9a2258..6be7f24d3 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -119,7 +119,8 @@ def test_completion_claude_stream():
 #     try:
 #         user_message = "write some code to find the sum of two numbers"
 #         messages = [{ "content": user_message,"role": "user"}]
-#         response = completion(model="stabilityai/stablecode-instruct-alpha-3b", messages=messages, custom_llm_provider="huggingface", logger_fn=logger_fn)
+#         api_base = "https://wyh9bqfgj2r1klv5.us-east-1.aws.endpoints.huggingface.cloud"
+#         response = completion(model="facebook/blenderbot-400M-distill", messages=messages, custom_llm_provider="huggingface", task="conversational", api_base=api_base, logger_fn=logger_fn)
 #         # Add any assertions here to check the response
 #         print(response)
 #     except Exception as e:
diff --git a/litellm/utils.py b/litellm/utils.py
index d91723eed..fe3efe06b 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -788,6 +788,7 @@ def get_optional_params(  # use the openai defaults
     model=None,
     custom_llm_provider="",
     top_k=40,
+    task=None
 ):
     optional_params = {}
     if model in litellm.anthropic_models:
@@ -882,6 +883,7 @@ def get_optional_params(  # use the openai defaults
         if presence_penalty != 0:
             optional_params["repetition_penalty"] = presence_penalty
         optional_params["details"] = True
+        optional_params["task"] = task
     elif custom_llm_provider == "sagemaker":
         if "llama-2" in model:
             # llama-2 models on sagemaker support the following args
diff --git a/pyproject.toml b/pyproject.toml
index f2c60d417..cca9d73fb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.618"
+version = "0.1.619"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"