From 38c61a23b4d024dd46dab69905f1a885fd427112 Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Tue, 16 Apr 2024 19:00:09 +0100 Subject: [PATCH] Fall back to `max_tokens` --- litellm/utils.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/litellm/utils.py b/litellm/utils.py index c497c6326..3260b1e15 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -10579,14 +10579,11 @@ def trim_messages( print_verbose(f"trimming messages") if max_tokens is None: # Check if model is valid - if ( - model in litellm.model_cost - and "max_input_tokens" in litellm.model_cost[model] - ): - max_tokens_for_model = litellm.model_cost[model]["max_input_tokens"] + if model in litellm.model_cost: + max_tokens_for_model = litellm.model_cost[model].get("max_input_tokens", litellm.model_cost[model]["max_tokens"]) max_tokens = int(max_tokens_for_model * trim_ratio) else: - # if user did not specify max input tokens + # if user did not specify max (input) tokens # or passed an llm litellm does not know # do nothing, just return messages return messages