mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
(fix) utils.token trimming, hanging for large messages
This commit is contained in:
parent
24c1207451
commit
81ce24b96e
1 changed files with 5 additions and 3 deletions
|
@ -4155,8 +4155,7 @@ def shorten_message_to_fit_limit(
|
||||||
|
|
||||||
ratio = (tokens_needed) / total_tokens
|
ratio = (tokens_needed) / total_tokens
|
||||||
|
|
||||||
new_length = int(len(content) * ratio)
|
new_length = int(len(content) * ratio) -1
|
||||||
print_verbose(new_length)
|
|
||||||
|
|
||||||
half_length = new_length // 2
|
half_length = new_length // 2
|
||||||
left_half = content[:half_length]
|
left_half = content[:half_length]
|
||||||
|
@ -4195,6 +4194,7 @@ def trim_messages(
|
||||||
# Initialize max_tokens
|
# Initialize max_tokens
|
||||||
# if users pass in max tokens, trim to this amount
|
# if users pass in max tokens, trim to this amount
|
||||||
try:
|
try:
|
||||||
|
print_verbose(f"trimming messages")
|
||||||
if max_tokens == None:
|
if max_tokens == None:
|
||||||
# Check if model is valid
|
# Check if model is valid
|
||||||
if model in litellm.model_cost:
|
if model in litellm.model_cost:
|
||||||
|
@ -4212,6 +4212,7 @@ def trim_messages(
|
||||||
system_message += message["content"]
|
system_message += message["content"]
|
||||||
|
|
||||||
current_tokens = token_counter(model=model, messages=messages)
|
current_tokens = token_counter(model=model, messages=messages)
|
||||||
|
print_verbose(f"Current tokens: {current_tokens}, max tokens: {max_tokens}")
|
||||||
|
|
||||||
# Do nothing if current tokens under messages
|
# Do nothing if current tokens under messages
|
||||||
if current_tokens < max_tokens:
|
if current_tokens < max_tokens:
|
||||||
|
@ -4230,7 +4231,8 @@ def trim_messages(
|
||||||
return final_messages, response_tokens
|
return final_messages, response_tokens
|
||||||
|
|
||||||
return final_messages
|
return final_messages
|
||||||
except: # [NON-Blocking, if error occurs just return final_messages
|
except Exception as e: # [NON-Blocking, if error occurs just return final_messages
|
||||||
|
print("Got exception while token trimming", e)
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
# this helper reads the .env and returns a list of supported llms for user
|
# this helper reads the .env and returns a list of supported llms for user
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue