(fix) utils.token trimming, hanging for large messages

2025-04-25 18:54:30 +00:00 · 2023-10-24 16:02:39 -07:00 · 2023-10-24 16:02:39 -07:00 · 81ce24b96e
commit 81ce24b96e
parent 24c1207451
1 changed files with 5 additions and 3 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -4155,8 +4155,7 @@ def shorten_message_to_fit_limit(
        ratio = (tokens_needed) / total_tokens
-        new_length = int(len(content) * ratio)
+        new_length = int(len(content) * ratio) -1
        print_verbose(new_length)
        half_length = new_length // 2
        left_half = content[:half_length]
@ -4195,6 +4194,7 @@ def trim_messages(
    # Initialize max_tokens
    # if users pass in max tokens, trim to this amount
    try:
        print_verbose(f"trimming messages")
        if max_tokens == None:
            # Check if model is valid
            if model in litellm.model_cost:
@ -4212,6 +4212,7 @@ def trim_messages(
                system_message += message["content"]
        current_tokens = token_counter(model=model, messages=messages)
        print_verbose(f"Current tokens: {current_tokens}, max tokens: {max_tokens}")
        # Do nothing if current tokens under messages
        if current_tokens < max_tokens:
@ -4230,7 +4231,8 @@ def trim_messages(
            return final_messages, response_tokens
        return final_messages
-    except: # [NON-Blocking, if error occurs just return final_messages
+    except Exception as e: # [NON-Blocking, if error occurs just return final_messages
        print("Got exception while token trimming", e)
        return messages
 # this helper reads the .env and returns a list of supported llms for user