diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index 6459cd2d8..9cef80ae0 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/llms/huggingface_restapi.py b/litellm/llms/huggingface_restapi.py index 90abdb2d2..0739d1e92 100644 --- a/litellm/llms/huggingface_restapi.py +++ b/litellm/llms/huggingface_restapi.py @@ -117,7 +117,6 @@ def completion( inference_params = copy.deepcopy(optional_params) inference_params.pop("details") inference_params.pop("return_full_text") - inference_params.pop("task") past_user_inputs = [] generated_responses = [] text = "" @@ -181,7 +180,6 @@ def completion( inference_params = copy.deepcopy(optional_params) inference_params.pop("details") inference_params.pop("return_full_text") - inference_params.pop("task") data = { "inputs": prompt, "parameters": inference_params, diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index f016ae97d..1bf8eadf9 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -351,7 +351,7 @@ def test_completion_cohere_stream_bad_key(): # }, # ] # response = completion( -# model="huggingface/mistralai/Mistral-7B-Instruct-v0.1", messages=messages, api_base="https://n9ox93a8sv5ihsow.us-east-1.aws.endpoints.huggingface.cloud", stream=True, max_tokens=1000 +# model="huggingface/mistralai/Mistral-7B-Instruct-v0.1", messages=messages, api_base="https://n9ox93a8sv5ihsow.us-east-1.aws.endpoints.huggingface.cloud", stream=True, max_tokens=1000, n=1 # ) # complete_response = "" # # Add any assertions here to check the response diff --git a/litellm/utils.py b/litellm/utils.py index b7995863d..122659829 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1039,7 +1039,7 @@ def get_optional_params( # use the openai defaults optional_params["stop_sequences"] = stop elif custom_llm_provider == "huggingface": ## check if unsupported param passed in - supported_params = ["stream", "temperature", "max_tokens", "top_p", "stop",] + supported_params = ["stream", "temperature", "max_tokens", "top_p", "stop", "n"] _check_valid_arg(supported_params=supported_params) if temperature: @@ -1055,6 +1055,8 @@ def get_optional_params( # use the openai defaults optional_params["stop"] = stop if max_tokens: optional_params["max_new_tokens"] = max_tokens + if n: + optional_params["best_of"] = n if presence_penalty: optional_params["repetition_penalty"] = presence_penalty elif custom_llm_provider == "together_ai": diff --git a/pyproject.toml b/pyproject.toml index eedbb81a7..1bc0aa326 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.815" +version = "0.1.816" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"