From 67dce555ece63a6572a658067d7f41ac22cfb87a Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 6 Feb 2024 13:10:29 -0800 Subject: [PATCH] fix(utils.py): round max tokens to be int always --- litellm/tests/test_completion.py | 5 +++-- litellm/utils.py | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index bd0301f20..de79c97af 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -544,13 +544,13 @@ def hf_test_completion_tgi(): def test_completion_openai(): try: litellm.set_verbose = True + litellm.drop_params = True print(f"api key: {os.environ['OPENAI_API_KEY']}") litellm.api_key = os.environ["OPENAI_API_KEY"] response = completion( model="gpt-3.5-turbo", - messages=messages, + messages=[{"role": "user", "content": "Hey"}], max_tokens=10, - request_timeout=1, metadata={"hi": "bye"}, ) print("This is the response object\n", response) @@ -565,6 +565,7 @@ def test_completion_openai(): assert len(response_str) > 1 litellm.api_key = None + raise Exception("it works!") except Timeout as e: pass except Exception as e: diff --git a/litellm/utils.py b/litellm/utils.py index 31eeaacab..62315b3d9 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -2350,7 +2350,9 @@ def client(original_function): elif user_max_tokens + input_tokens > max_output_tokens: user_max_tokens = max_output_tokens - input_tokens print_verbose(f"user_max_tokens: {user_max_tokens}") - kwargs["max_tokens"] = user_max_tokens + kwargs["max_tokens"] = int( + round(user_max_tokens) + ) # make sure max tokens is always an int except Exception as e: print_verbose(f"Error while checking max token limit: {str(e)}") # MODEL CALL