Use max_input_token for trim_messages

2025-04-26 19:24:27 +00:00 · 2024-04-16 13:36:25 +01:00 · 2024-04-16 13:36:25 +01:00 · ebc889d77a
commit ebc889d77a
parent 7ffd3d40fa
2 changed files with 24 additions and 5 deletions
--- a/litellm/tests/test_utils.py
+++ b/litellm/tests/test_utils.py
@ -173,6 +173,22 @@ def test_trimming_should_not_change_original_messages():
    assert messages == messages_copy
@pytest.mark.parametrize("model", ["gpt-4-0125-preview", "claude-3-opus-20240229"])
 def test_trimming_with_model_cost_max_input_tokens(model):
    messages = [
        {"role": "system", "content": "This is a normal system message"},
        {
            "role": "user",
            "content": "This is a sentence" * 100000,
        },
    ]
    trimmed_messages = trim_messages(messages, model=model)
    assert (
        get_token_count(trimmed_messages, model=model)
        < litellm.model_cost[model]["max_input_tokens"]
    )
 def test_get_valid_models():
    old_environ = os.environ
    os.environ = {"OPENAI_API_KEY": "temp"}  # mock set only openai key in environ
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -10577,16 +10577,19 @@ def trim_messages(
    messages = copy.deepcopy(messages)
    try:
        print_verbose(f"trimming messages")
-        if max_tokens == None:
+        if max_tokens is None:
            # Check if model is valid
-            if model in litellm.model_cost:
+            if (
-                max_tokens_for_model = litellm.model_cost[model]["max_tokens"]
+                model in litellm.model_cost
                and "max_input_tokens" in litellm.model_cost[model]
            ):
                max_tokens_for_model = litellm.model_cost[model]["max_input_tokens"]
                max_tokens = int(max_tokens_for_model * trim_ratio)
            else:
-                # if user did not specify max tokens
+                # if user did not specify max input tokens
                # or passed an llm litellm does not know
                # do nothing, just return messages
-                return
+                return messages
        system_message = ""
        for message in messages: