diff --git a/litellm/llms/huggingface_restapi.py b/litellm/llms/huggingface_restapi.py index 43c293a7b4..de1781ee0d 100644 --- a/litellm/llms/huggingface_restapi.py +++ b/litellm/llms/huggingface_restapi.py @@ -102,8 +102,7 @@ def completion( "stream": True if "stream" in optional_params and optional_params["stream"] == True else False, } input_text = prompt - elif task == "other": - print("task=other, custom api base") + elif task == "other" or task == None: if model in custom_prompt_dict: # check if the model has a registered custom prompt model_prompt_details = custom_prompt_dict[model] @@ -118,8 +117,6 @@ def completion( inference_params = copy.deepcopy(optional_params) inference_params.pop("details") inference_params.pop("return_full_text") - print("inf params") - print(inference_params) data = { "inputs": prompt, "parameters": inference_params, @@ -185,8 +182,8 @@ def completion( for token in completion_response[0]["details"]["tokens"]: sum_logprob += token["logprob"] model_response["choices"][0]["message"]["logprobs"] = sum_logprob - elif task == "other": - model_response["choices"][0]["message"]["content"] = str(completion_response[0]["generated_text"]) + else: + model_response["choices"][0]["message"]["content"] = completion_response[0]["generated_text"] ## CALCULATING USAGE prompt_tokens = len( encoding.encode(input_text)