forked from phoenix/litellm-mirror
Fall back to max_tokens
This commit is contained in:
parent
ebc889d77a
commit
38c61a23b4
1 changed files with 3 additions and 6 deletions
|
@ -10579,14 +10579,11 @@ def trim_messages(
|
||||||
print_verbose(f"trimming messages")
|
print_verbose(f"trimming messages")
|
||||||
if max_tokens is None:
|
if max_tokens is None:
|
||||||
# Check if model is valid
|
# Check if model is valid
|
||||||
if (
|
if model in litellm.model_cost:
|
||||||
model in litellm.model_cost
|
max_tokens_for_model = litellm.model_cost[model].get("max_input_tokens", litellm.model_cost[model]["max_tokens"])
|
||||||
and "max_input_tokens" in litellm.model_cost[model]
|
|
||||||
):
|
|
||||||
max_tokens_for_model = litellm.model_cost[model]["max_input_tokens"]
|
|
||||||
max_tokens = int(max_tokens_for_model * trim_ratio)
|
max_tokens = int(max_tokens_for_model * trim_ratio)
|
||||||
else:
|
else:
|
||||||
# if user did not specify max input tokens
|
# if user did not specify max (input) tokens
|
||||||
# or passed an llm litellm does not know
|
# or passed an llm litellm does not know
|
||||||
# do nothing, just return messages
|
# do nothing, just return messages
|
||||||
return messages
|
return messages
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue