fix(utils.py): handle failed hf tokenizer request during calls (#8032)

* fix(utils.py): handle failed hf tokenizer request during calls

prevents proxy from failing due to bad hf tokenizer calls

* fix(utils.py): convert failure callback str to custom logger class

Fixes https://github.com/BerriAI/litellm/issues/8013

* test(test_utils.py): fix test - avoid adding mlflow dep on ci/cd

* fix: add missing env vars to test

* test: cleanup redundant test
This commit is contained in:
Krish Dholakia 2025-01-28 17:20:36 -08:00 committed by GitHub
parent 74e332bfdd
commit c2e3986bbc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 136 additions and 23 deletions

View file

@ -474,6 +474,11 @@ def function_setup( # noqa: PLR0915
if inspect.iscoroutinefunction(callback):
litellm._async_failure_callback.append(callback)
removed_async_items.append(index)
elif (
callback in litellm._known_custom_logger_compatible_callbacks
and isinstance(callback, str)
):
_add_custom_logger_callback_to_specific_event(callback, "failure")
# Pop the async items from failure_callback in reverse order to avoid index issues
for index in reversed(removed_async_items):
@ -1385,30 +1390,33 @@ def _select_tokenizer(
@lru_cache(maxsize=128)
def _select_tokenizer_helper(model: str):
if model in litellm.cohere_models and "command-r" in model:
# cohere
cohere_tokenizer = Tokenizer.from_pretrained(
"Xenova/c4ai-command-r-v01-tokenizer"
)
return {"type": "huggingface_tokenizer", "tokenizer": cohere_tokenizer}
# anthropic
elif model in litellm.anthropic_models and "claude-3" not in model:
claude_tokenizer = Tokenizer.from_str(claude_json_str)
return {"type": "huggingface_tokenizer", "tokenizer": claude_tokenizer}
# llama2
elif "llama-2" in model.lower() or "replicate" in model.lower():
tokenizer = Tokenizer.from_pretrained("hf-internal-testing/llama-tokenizer")
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
# llama3
elif "llama-3" in model.lower():
tokenizer = Tokenizer.from_pretrained("Xenova/llama-3-tokenizer")
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
try:
if model in litellm.cohere_models and "command-r" in model:
# cohere
cohere_tokenizer = Tokenizer.from_pretrained(
"Xenova/c4ai-command-r-v01-tokenizer"
)
return {"type": "huggingface_tokenizer", "tokenizer": cohere_tokenizer}
# anthropic
elif model in litellm.anthropic_models and "claude-3" not in model:
claude_tokenizer = Tokenizer.from_str(claude_json_str)
return {"type": "huggingface_tokenizer", "tokenizer": claude_tokenizer}
# llama2
elif "llama-2" in model.lower() or "replicate" in model.lower():
tokenizer = Tokenizer.from_pretrained("hf-internal-testing/llama-tokenizer")
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
# llama3
elif "llama-3" in model.lower():
tokenizer = Tokenizer.from_pretrained("Xenova/llama-3-tokenizer")
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
except Exception as e:
verbose_logger.debug(f"Error selecting tokenizer: {e}")
# default - tiktoken
else:
return {
"type": "openai_tokenizer",
"tokenizer": encoding,
} # default to openai tokenizer
return {
"type": "openai_tokenizer",
"tokenizer": encoding,
} # default to openai tokenizer
def encode(model="", text="", custom_tokenizer: Optional[dict] = None):