fix(utils.py): use llama tokenizer for replicate models

This commit is contained in:
Krrish Dholakia 2024-04-29 08:28:31 -07:00
parent dc5c175406
commit a18844b230

View file

@ -3668,7 +3668,7 @@ def _select_tokenizer(model: str):
tokenizer = Tokenizer.from_str(json_str)
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
# llama2
elif "llama-2" in model.lower():
elif "llama-2" in model.lower() or "replicate" in model.lower():
tokenizer = Tokenizer.from_pretrained("hf-internal-testing/llama-tokenizer")
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
# default - tiktoken