diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index ee29db147..8fc681ffa 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index 63406a90e..a36ab177a 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py index a5e50a654..a1d11a797 100644 --- a/litellm/llms/anthropic.py +++ b/litellm/llms/anthropic.py @@ -18,6 +18,13 @@ class AnthropicError(Exception): self.message ) # Call the base class constructor with the parameters it needs + +# contains any default values we need to pass to the provider +AnthropicConfig = { + "max_tokens_to_sample": 256 # override by setting - completion(..,max_tokens=300) +} + + # makes headers for API call def validate_environment(api_key): if api_key is None: @@ -63,13 +70,16 @@ def completion( else: prompt += f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}" prompt += f"{AnthropicConstants.AI_PROMPT.value}" - max_tokens_to_sample = optional_params.get("max_tokens_to_sample", 256) # required anthropic param, default to 256 if user does not provide an input - if max_tokens_to_sample != 256: # not default - print for testing + + ## Load Config + for k, v in AnthropicConfig.items(): + if k not in optional_params: + optional_params[k] = v + if optional_params["max_tokens_to_sample"] != 256: # not default - print for testing print_verbose(f"LiteLLM.Anthropic: Max Tokens Set") data = { "model": model, "prompt": prompt, - "max_tokens_to_sample": max_tokens_to_sample, **optional_params, } diff --git a/litellm/llms/huggingface_restapi.py b/litellm/llms/huggingface_restapi.py index 6c0231312..90abdb2d2 100644 --- a/litellm/llms/huggingface_restapi.py +++ b/litellm/llms/huggingface_restapi.py @@ -19,7 +19,8 @@ class HuggingfaceError(Exception): # contains any default values we need to pass to the provider HuggingfaceConfig = { - "return_full_text": False # override by setting - completion(..,return_full_text=True) + "return_full_text": False, # override by setting - completion(..,return_full_text=True) + "details": True # needed for getting logprobs etc. for tgi models. override by setting - completion(..., details=False) } def validate_environment(api_key): diff --git a/litellm/llms/petals.py b/litellm/llms/petals.py index 666602b2d..b11fbe06d 100644 --- a/litellm/llms/petals.py +++ b/litellm/llms/petals.py @@ -14,6 +14,10 @@ class PetalsError(Exception): self.message ) # Call the base class constructor with the parameters it needs +PetalsConfig = { + "max_new_tokens": 256 +} + def completion( model: str, messages: list, @@ -54,6 +58,10 @@ def completion( else: prompt += f"{message['content']}" + ## Load Config + for k, v in PetalsConfig.items(): + if k not in optional_params: + optional_params[k] = v ## LOGGING logging_obj.pre_call( diff --git a/litellm/main.py b/litellm/main.py index 0e595adf1..ee9c9a423 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -157,9 +157,9 @@ def completion( temperature: Optional[float] = None, top_p: Optional[float] = None, n: Optional[int] = None, - stream: bool = False, + stream: Optional[bool] = None, stop=None, - max_tokens: float = float("inf"), + max_tokens: Optional[float] = None, presence_penalty: Optional[float] = None, frequency_penalty: Optional[float]=None, logit_bias: dict = {}, @@ -218,7 +218,7 @@ def completion( ######## end of unpacking kwargs ########### args = locals() openai_params = ["functions", "function_call", "temperature", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "metadata"] - litellm_params = ["return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "id", "metadata", "fallbacks"] + litellm_params = ["return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "metadata", "fallbacks"] default_params = openai_params + litellm_params non_default_params = {k: v for k,v in kwargs.items() if k not in default_params} # model-specific params - pass them straight to the model/provider if mock_response: diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 6377e8e8b..050152708 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -47,7 +47,7 @@ def test_completion_claude(): print(response.response_ms) except Exception as e: pytest.fail(f"Error occurred: {e}") -# test_completion_claude() +test_completion_claude() def test_completion_claude_max_tokens(): try: diff --git a/litellm/tests/test_stream_chunk_builder.py b/litellm/tests/test_stream_chunk_builder.py index 23dcf57a8..465ee4a52 100644 --- a/litellm/tests/test_stream_chunk_builder.py +++ b/litellm/tests/test_stream_chunk_builder.py @@ -1,6 +1,7 @@ from litellm import completion, stream_chunk_builder import litellm -import os +import os, dotenv +dotenv.load_dotenv() user_message = "What is the current weather in Boston?" messages = [{"content": user_message, "role": "user"}] diff --git a/litellm/utils.py b/litellm/utils.py index 1398d056d..9db751674 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -977,6 +977,9 @@ def get_optional_params( # use the openai defaults raise ValueError("LiteLLM.Exception: Function calling is not supported by this provider") def _check_valid_arg(supported_params): + print(f"checking params for {model}") + print(f"params passed in {passed_params}") + print(f"non-default params passed in {non_default_params}") unsupported_params = [k for k in non_default_params.keys() if k not in supported_params] if unsupported_params: raise ValueError("LiteLLM.Exception: Unsupported parameters passed: {}".format(', '.join(unsupported_params))) @@ -990,15 +993,14 @@ def get_optional_params( # use the openai defaults # handle anthropic params if stream: optional_params["stream"] = stream - if stop != None: + if stop: optional_params["stop_sequences"] = stop - if temperature != 1: + if temperature: optional_params["temperature"] = temperature - if top_p != 1: + if top_p: optional_params["top_p"] = top_p - if max_tokens != float("inf"): + if max_tokens: optional_params["max_tokens_to_sample"] = max_tokens - return optional_params elif custom_llm_provider == "cohere": ## check if unsupported param passed in supported_params = ["stream", "temperature", "max_tokens", "logit_bias"] @@ -1006,13 +1008,12 @@ def get_optional_params( # use the openai defaults # handle cohere params if stream: optional_params["stream"] = stream - if temperature != 1: + if temperature: optional_params["temperature"] = temperature - if max_tokens != float("inf"): + if max_tokens: optional_params["max_tokens"] = max_tokens if logit_bias != {}: optional_params["logit_bias"] = logit_bias - return optional_params elif custom_llm_provider == "replicate": ## check if unsupported param passed in supported_params = ["stream", "temperature", "max_tokens", "top_p", "stop"] @@ -1021,39 +1022,37 @@ def get_optional_params( # use the openai defaults if stream: optional_params["stream"] = stream return optional_params - if max_tokens != float("inf"): + if max_tokens: if "vicuna" in model or "flan" in model: optional_params["max_length"] = max_tokens else: optional_params["max_new_tokens"] = max_tokens - if temperature != 1: + if temperature: optional_params["temperature"] = temperature - if top_p != 1: + if top_p: optional_params["top_p"] = top_p - if stop != None: + if stop: optional_params["stop_sequences"] = stop elif custom_llm_provider == "huggingface": ## check if unsupported param passed in - supported_params = ["stream", "temperature", "max_tokens", "top_p", "stop", "return_full_text", "details"] + supported_params = ["stream", "temperature", "max_tokens", "top_p", "stop",] _check_valid_arg(supported_params=supported_params) - if temperature != 1: + if temperature: optional_params["temperature"] = temperature - if top_p != 1: + if top_p: optional_params["top_p"] = top_p - if n != 1: + if n: optional_params["best_of"] = n optional_params["do_sample"] = True # need to sample if you want best of for hf inference endpoints if stream: optional_params["stream"] = stream - if stop != None: + if stop: optional_params["stop"] = stop - if max_tokens != float("inf"): + if max_tokens: optional_params["max_new_tokens"] = max_tokens - if presence_penalty != 0: + if presence_penalty: optional_params["repetition_penalty"] = presence_penalty - optional_params["return_full_text"] = return_full_text - optional_params["details"] = True elif custom_llm_provider == "together_ai": ## check if unsupported param passed in supported_params = ["stream", "temperature", "max_tokens", "top_p", "stop", "frequency_penalty"] @@ -1061,24 +1060,24 @@ def get_optional_params( # use the openai defaults if stream: optional_params["stream_tokens"] = stream - if temperature != 1: + if temperature: optional_params["temperature"] = temperature - if top_p != 1: + if top_p: optional_params["top_p"] = top_p - if max_tokens != float("inf"): + if max_tokens: optional_params["max_tokens"] = max_tokens - if frequency_penalty != 0: + if frequency_penalty: optional_params["frequency_penalty"] = frequency_penalty # TODO: Check if should be repetition penalty - if stop != None: + if stop: optional_params["stop"] = stop #TG AI expects a list, example ["\n\n\n\n","<|endoftext|>"] elif custom_llm_provider == "palm": ## check if unsupported param passed in supported_params = ["temperature", "top_p"] _check_valid_arg(supported_params=supported_params) - if temperature != 1: + if temperature: optional_params["temperature"] = temperature - if top_p != 1: + if top_p: optional_params["top_p"] = top_p elif ( custom_llm_provider == "vertex_ai" @@ -1087,13 +1086,13 @@ def get_optional_params( # use the openai defaults supported_params = ["temperature", "top_p", "max_tokens", "stream"] _check_valid_arg(supported_params=supported_params) - if temperature != 1: + if temperature: optional_params["temperature"] = temperature - if top_p != 1: + if top_p: optional_params["top_p"] = top_p if stream: optional_params["stream"] = stream - if max_tokens != float("inf"): + if max_tokens: optional_params["max_output_tokens"] = max_tokens elif custom_llm_provider == "sagemaker": if "llama-2" in model: @@ -1108,11 +1107,11 @@ def get_optional_params( # use the openai defaults supported_params = ["temperature", "max_tokens"] _check_valid_arg(supported_params=supported_params) - if max_tokens != float("inf"): + if max_tokens: optional_params["max_new_tokens"] = max_tokens - if temperature != 1: + if temperature: optional_params["temperature"] = temperature - if top_p != 1: + if top_p: optional_params["top_p"] = top_p else: ## check if unsupported param passed in @@ -1124,92 +1123,90 @@ def get_optional_params( # use the openai defaults _check_valid_arg(supported_params=supported_params) # params "maxTokens":200,"temperature":0,"topP":250,"stop_sequences":[], # https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=j2-ultra - if max_tokens != float("inf"): + if max_tokens: optional_params["maxTokens"] = max_tokens - if temperature != 1: + if temperature: optional_params["temperature"] = temperature - if stop != None: + if stop: optional_params["stop_sequences"] = stop - if top_p != 1: + if top_p: optional_params["topP"] = top_p elif "anthropic" in model: supported_params = ["max_tokens", "temperature", "stop", "top_p"] _check_valid_arg(supported_params=supported_params) # anthropic params on bedrock # \"max_tokens_to_sample\":300,\"temperature\":0.5,\"top_p\":1,\"stop_sequences\":[\"\\\\n\\\\nHuman:\"]}" - if max_tokens != float("inf"): + if max_tokens: optional_params["max_tokens_to_sample"] = max_tokens else: optional_params["max_tokens_to_sample"] = 256 # anthropic fails without max_tokens_to_sample - if temperature != 1: + if temperature: optional_params["temperature"] = temperature - if top_p != 1: + if top_p: optional_params["top_p"] = top_p - if stop != None: + if stop: optional_params["stop_sequences"] = stop elif "amazon" in model: # amazon titan llms supported_params = ["max_tokens", "temperature", "stop", "top_p"] _check_valid_arg(supported_params=supported_params) # see https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=titan-large - if max_tokens != float("inf"): + if max_tokens: optional_params["maxTokenCount"] = max_tokens - if temperature != 1: + if temperature: optional_params["temperature"] = temperature - if stop != None: + if stop: optional_params["stopSequences"] = stop - if top_p != 1: + if top_p: optional_params["topP"] = top_p elif model in litellm.aleph_alpha_models: supported_params = ["max_tokens", "stream", "top_p", "temperature", "presence_penalty", "frequency_penalty", "n", "stop"] _check_valid_arg(supported_params=supported_params) - if max_tokens != float("inf"): + if max_tokens: optional_params["maximum_tokens"] = max_tokens if stream: optional_params["stream"] = stream - if temperature != 1: + if temperature: optional_params["temperature"] = temperature - if top_p != 1: + if top_p: optional_params["top_p"] = top_p - if presence_penalty != 0: + if presence_penalty: optional_params["presence_penalty"] = presence_penalty - if frequency_penalty != 0: + if frequency_penalty: optional_params["frequency_penalty"] = frequency_penalty - if n != 1: + if n: optional_params["n"] = n - if stop != None: + if stop: optional_params["stop_sequences"] = stop elif model in litellm.nlp_cloud_models or custom_llm_provider == "nlp_cloud": supported_params = ["max_tokens", "stream", "temperature", "top_p", "presence_penalty", "frequency_penalty", "n", "stop"] _check_valid_arg(supported_params=supported_params) - if max_tokens != float("inf"): + if max_tokens: optional_params["max_length"] = max_tokens if stream: optional_params["stream"] = stream - if temperature != 1: + if temperature: optional_params["temperature"] = temperature - if top_p != 1: + if top_p: optional_params["top_p"] = top_p - if presence_penalty != 0: + if presence_penalty: optional_params["presence_penalty"] = presence_penalty - if frequency_penalty != 0: + if frequency_penalty: optional_params["frequency_penalty"] = frequency_penalty - if n != 1: + if n: optional_params["num_return_sequences"] = n - if stop != None: + if stop: optional_params["stop_sequences"] = stop elif model in litellm.petals_models or custom_llm_provider == "petals": supported_params = ["max_tokens", "temperature", "top_p"] _check_valid_arg(supported_params=supported_params) # max_new_tokens=1,temperature=0.9, top_p=0.6 - if max_tokens != float("inf"): + if max_tokens: optional_params["max_new_tokens"] = max_tokens - else: - optional_params["max_new_tokens"] = 256 # petals always needs max_new_tokens - if temperature != 1: + if temperature: optional_params["temperature"] = temperature - if top_p != 1: + if top_p: optional_params["top_p"] = top_p else: # assume passing in params for openai/azure openai supported_params = ["functions", "function_call", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "logit_bias", "user", "deployment_id"] @@ -1219,6 +1216,7 @@ def get_optional_params( # use the openai defaults for k in passed_params.keys(): if k not in default_params.keys(): optional_params[k] = passed_params[k] + print(f"final params going to model: {optional_params}") return optional_params def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None): diff --git a/pyproject.toml b/pyproject.toml index 129acb1c2..cc845f058 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.809" +version = "0.1.810" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"