fix(utils.py): add lru-cache logic to _select_tokenizer

speed up tokenizer load times
This commit is contained in:
Krrish Dholakia 2024-05-14 16:38:45 -07:00
parent 811406a2c6
commit 2b41f09268

View file

@ -12,10 +12,9 @@ import dotenv, json, traceback, threading, base64, ast
import subprocess, os import subprocess, os
from os.path import abspath, join, dirname from os.path import abspath, join, dirname
import litellm, openai import litellm, openai
import itertools import itertools
import random, uuid, requests # type: ignore import random, uuid, requests # type: ignore
from functools import wraps from functools import wraps, lru_cache
import datetime, time import datetime, time
import tiktoken import tiktoken
import uuid import uuid
@ -3881,6 +3880,7 @@ def get_replicate_completion_pricing(completion_response=None, total_time=0.0):
return a100_80gb_price_per_second_public * total_time / 1000 return a100_80gb_price_per_second_public * total_time / 1000
@lru_cache(maxsize=128)
def _select_tokenizer(model: str): def _select_tokenizer(model: str):
from importlib import resources from importlib import resources