mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
fix(utils.py): round max tokens to be int always
This commit is contained in:
parent
647dbb9331
commit
eee5353e77
2 changed files with 6 additions and 3 deletions
|
@ -544,13 +544,13 @@ def hf_test_completion_tgi():
|
||||||
def test_completion_openai():
|
def test_completion_openai():
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
litellm.drop_params = True
|
||||||
print(f"api key: {os.environ['OPENAI_API_KEY']}")
|
print(f"api key: {os.environ['OPENAI_API_KEY']}")
|
||||||
litellm.api_key = os.environ["OPENAI_API_KEY"]
|
litellm.api_key = os.environ["OPENAI_API_KEY"]
|
||||||
response = completion(
|
response = completion(
|
||||||
model="gpt-3.5-turbo",
|
model="gpt-3.5-turbo",
|
||||||
messages=messages,
|
messages=[{"role": "user", "content": "Hey"}],
|
||||||
max_tokens=10,
|
max_tokens=10,
|
||||||
request_timeout=1,
|
|
||||||
metadata={"hi": "bye"},
|
metadata={"hi": "bye"},
|
||||||
)
|
)
|
||||||
print("This is the response object\n", response)
|
print("This is the response object\n", response)
|
||||||
|
@ -565,6 +565,7 @@ def test_completion_openai():
|
||||||
assert len(response_str) > 1
|
assert len(response_str) > 1
|
||||||
|
|
||||||
litellm.api_key = None
|
litellm.api_key = None
|
||||||
|
raise Exception("it works!")
|
||||||
except Timeout as e:
|
except Timeout as e:
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
@ -2348,7 +2348,9 @@ def client(original_function):
|
||||||
elif user_max_tokens + input_tokens > max_output_tokens:
|
elif user_max_tokens + input_tokens > max_output_tokens:
|
||||||
user_max_tokens = max_output_tokens - input_tokens
|
user_max_tokens = max_output_tokens - input_tokens
|
||||||
print_verbose(f"user_max_tokens: {user_max_tokens}")
|
print_verbose(f"user_max_tokens: {user_max_tokens}")
|
||||||
kwargs["max_tokens"] = user_max_tokens
|
kwargs["max_tokens"] = int(
|
||||||
|
round(user_max_tokens)
|
||||||
|
) # make sure max tokens is always an int
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose(f"Error while checking max token limit: {str(e)}")
|
print_verbose(f"Error while checking max token limit: {str(e)}")
|
||||||
# MODEL CALL
|
# MODEL CALL
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue