fix(utils.py): add lru-cache logic to _select_tokenizer

speed up tokenizer load times
2025-04-25 18:54:30 +00:00 · 2024-05-14 16:38:45 -07:00 · 2024-05-14 16:38:45 -07:00 · 2b41f09268
commit 2b41f09268
parent 811406a2c6
1 changed files with 2 additions and 2 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -12,10 +12,9 @@ import dotenv, json, traceback, threading, base64, ast
 import subprocess, os
 from os.path import abspath, join, dirname
 import litellm, openai
-
 import itertools
 import random, uuid, requests  # type: ignore
-from functools import wraps
+from functools import wraps, lru_cache
 import datetime, time
 import tiktoken
 import uuid
@ -3881,6 +3880,7 @@ def get_replicate_completion_pricing(completion_response=None, total_time=0.0):
    return a100_80gb_price_per_second_public * total_time / 1000


+@lru_cache(maxsize=128)
 def _select_tokenizer(model: str):
    from importlib import resources