diff --git a/litellm/tests/test_utils.py b/litellm/tests/test_utils.py index ac8ab11343..22a4af93c3 100644 --- a/litellm/tests/test_utils.py +++ b/litellm/tests/test_utils.py @@ -38,7 +38,7 @@ def test_multiple_messages_trimming(): {"role": "user", "content": "This is a long message that will exceed the token limit."}, {"role": "user", "content": "This is another long message that will also exceed the limit."} ] - trimmed_messages = trim_messages(messages_copy=messages, model="gpt-3.5-turbo", max_tokens=20) + trimmed_messages = trim_messages(messages=messages, model="gpt-3.5-turbo", max_tokens=20) # print(get_token_count(messages=trimmed_messages, model="gpt-3.5-turbo")) assert(get_token_count(messages=trimmed_messages, model="gpt-3.5-turbo")) <= 20 # test_multiple_messages_trimming() @@ -48,7 +48,7 @@ def test_multiple_messages_no_trimming(): {"role": "user", "content": "This is a long message that will exceed the token limit."}, {"role": "user", "content": "This is another long message that will also exceed the limit."} ] - trimmed_messages = trim_messages(messages_copy=messages, model="gpt-3.5-turbo", max_tokens=100) + trimmed_messages = trim_messages(messages=messages, model="gpt-3.5-turbo", max_tokens=100) print("Trimmed messages") print(trimmed_messages) assert(messages==trimmed_messages) diff --git a/litellm/utils.py b/litellm/utils.py index 11ab03472c..2b797ec44c 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4477,7 +4477,7 @@ def completion_with_config(config: Union[dict, str], **kwargs): except: continue if prompt_larger_than_model: - messages = trim_messages(messages_copy=messages, model=max_model) + messages = trim_messages(messages=messages, model=max_model) kwargs["messages"] = messages kwargs["model"] = model @@ -4666,7 +4666,7 @@ def shorten_message_to_fit_limit( # this code is borrowed from https://github.com/KillianLucas/tokentrim/blob/main/tokentrim/tokentrim.py # Credits for this code go to Killian Lucas def trim_messages( - messages_copy, + messages, model: Optional[str] = None, trim_ratio: float = 0.75, return_response_tokens: bool = False, @@ -4687,7 +4687,7 @@ def trim_messages( """ # Initialize max_tokens # if users pass in max tokens, trim to this amount - messages_copy = copy.deepcopy(messages_copy) + messages_copy = copy.deepcopy(messages) try: print_verbose(f"trimming messages") if max_tokens == None: