Merge pull request #3062 from cwang/cwang/trim-messages-fix

Use `max_input_token` for `trim_messages`
This commit is contained in:
Krish Dholakia 2024-04-16 22:29:45 -07:00 committed by GitHub
commit 8febe2f573
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 20 additions and 4 deletions

View file

@ -10592,16 +10592,16 @@ def trim_messages(
messages = copy.deepcopy(messages)
try:
print_verbose(f"trimming messages")
if max_tokens == None:
if max_tokens is None:
# Check if model is valid
if model in litellm.model_cost:
max_tokens_for_model = litellm.model_cost[model]["max_tokens"]
max_tokens_for_model = litellm.model_cost[model].get("max_input_tokens", litellm.model_cost[model]["max_tokens"])
max_tokens = int(max_tokens_for_model * trim_ratio)
else:
# if user did not specify max tokens
# if user did not specify max (input) tokens
# or passed an llm litellm does not know
# do nothing, just return messages
return
return messages
system_message = ""
for message in messages: