mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
(fix) utils.token trimming, hanging for large messages
This commit is contained in:
parent
24c1207451
commit
81ce24b96e
1 changed files with 5 additions and 3 deletions
|
@ -4155,8 +4155,7 @@ def shorten_message_to_fit_limit(
|
|||
|
||||
ratio = (tokens_needed) / total_tokens
|
||||
|
||||
new_length = int(len(content) * ratio)
|
||||
print_verbose(new_length)
|
||||
new_length = int(len(content) * ratio) -1
|
||||
|
||||
half_length = new_length // 2
|
||||
left_half = content[:half_length]
|
||||
|
@ -4195,6 +4194,7 @@ def trim_messages(
|
|||
# Initialize max_tokens
|
||||
# if users pass in max tokens, trim to this amount
|
||||
try:
|
||||
print_verbose(f"trimming messages")
|
||||
if max_tokens == None:
|
||||
# Check if model is valid
|
||||
if model in litellm.model_cost:
|
||||
|
@ -4212,6 +4212,7 @@ def trim_messages(
|
|||
system_message += message["content"]
|
||||
|
||||
current_tokens = token_counter(model=model, messages=messages)
|
||||
print_verbose(f"Current tokens: {current_tokens}, max tokens: {max_tokens}")
|
||||
|
||||
# Do nothing if current tokens under messages
|
||||
if current_tokens < max_tokens:
|
||||
|
@ -4230,7 +4231,8 @@ def trim_messages(
|
|||
return final_messages, response_tokens
|
||||
|
||||
return final_messages
|
||||
except: # [NON-Blocking, if error occurs just return final_messages
|
||||
except Exception as e: # [NON-Blocking, if error occurs just return final_messages
|
||||
print("Got exception while token trimming", e)
|
||||
return messages
|
||||
|
||||
# this helper reads the .env and returns a list of supported llms for user
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue