mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
fix(utils.py): handle failed hf tokenizer request during calls (#8032)
* fix(utils.py): handle failed hf tokenizer request during calls prevents proxy from failing due to bad hf tokenizer calls * fix(utils.py): convert failure callback str to custom logger class Fixes https://github.com/BerriAI/litellm/issues/8013 * test(test_utils.py): fix test - avoid adding mlflow dep on ci/cd * fix: add missing env vars to test * test: cleanup redundant test
This commit is contained in:
parent
74e332bfdd
commit
c2e3986bbc
3 changed files with 136 additions and 23 deletions
|
@ -474,6 +474,11 @@ def function_setup( # noqa: PLR0915
|
||||||
if inspect.iscoroutinefunction(callback):
|
if inspect.iscoroutinefunction(callback):
|
||||||
litellm._async_failure_callback.append(callback)
|
litellm._async_failure_callback.append(callback)
|
||||||
removed_async_items.append(index)
|
removed_async_items.append(index)
|
||||||
|
elif (
|
||||||
|
callback in litellm._known_custom_logger_compatible_callbacks
|
||||||
|
and isinstance(callback, str)
|
||||||
|
):
|
||||||
|
_add_custom_logger_callback_to_specific_event(callback, "failure")
|
||||||
|
|
||||||
# Pop the async items from failure_callback in reverse order to avoid index issues
|
# Pop the async items from failure_callback in reverse order to avoid index issues
|
||||||
for index in reversed(removed_async_items):
|
for index in reversed(removed_async_items):
|
||||||
|
@ -1385,30 +1390,33 @@ def _select_tokenizer(
|
||||||
|
|
||||||
@lru_cache(maxsize=128)
|
@lru_cache(maxsize=128)
|
||||||
def _select_tokenizer_helper(model: str):
|
def _select_tokenizer_helper(model: str):
|
||||||
if model in litellm.cohere_models and "command-r" in model:
|
try:
|
||||||
# cohere
|
if model in litellm.cohere_models and "command-r" in model:
|
||||||
cohere_tokenizer = Tokenizer.from_pretrained(
|
# cohere
|
||||||
"Xenova/c4ai-command-r-v01-tokenizer"
|
cohere_tokenizer = Tokenizer.from_pretrained(
|
||||||
)
|
"Xenova/c4ai-command-r-v01-tokenizer"
|
||||||
return {"type": "huggingface_tokenizer", "tokenizer": cohere_tokenizer}
|
)
|
||||||
# anthropic
|
return {"type": "huggingface_tokenizer", "tokenizer": cohere_tokenizer}
|
||||||
elif model in litellm.anthropic_models and "claude-3" not in model:
|
# anthropic
|
||||||
claude_tokenizer = Tokenizer.from_str(claude_json_str)
|
elif model in litellm.anthropic_models and "claude-3" not in model:
|
||||||
return {"type": "huggingface_tokenizer", "tokenizer": claude_tokenizer}
|
claude_tokenizer = Tokenizer.from_str(claude_json_str)
|
||||||
# llama2
|
return {"type": "huggingface_tokenizer", "tokenizer": claude_tokenizer}
|
||||||
elif "llama-2" in model.lower() or "replicate" in model.lower():
|
# llama2
|
||||||
tokenizer = Tokenizer.from_pretrained("hf-internal-testing/llama-tokenizer")
|
elif "llama-2" in model.lower() or "replicate" in model.lower():
|
||||||
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
|
tokenizer = Tokenizer.from_pretrained("hf-internal-testing/llama-tokenizer")
|
||||||
# llama3
|
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
|
||||||
elif "llama-3" in model.lower():
|
# llama3
|
||||||
tokenizer = Tokenizer.from_pretrained("Xenova/llama-3-tokenizer")
|
elif "llama-3" in model.lower():
|
||||||
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
|
tokenizer = Tokenizer.from_pretrained("Xenova/llama-3-tokenizer")
|
||||||
|
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.debug(f"Error selecting tokenizer: {e}")
|
||||||
|
|
||||||
# default - tiktoken
|
# default - tiktoken
|
||||||
else:
|
return {
|
||||||
return {
|
"type": "openai_tokenizer",
|
||||||
"type": "openai_tokenizer",
|
"tokenizer": encoding,
|
||||||
"tokenizer": encoding,
|
} # default to openai tokenizer
|
||||||
} # default to openai tokenizer
|
|
||||||
|
|
||||||
|
|
||||||
def encode(model="", text="", custom_tokenizer: Optional[dict] = None):
|
def encode(model="", text="", custom_tokenizer: Optional[dict] = None):
|
||||||
|
|
|
@ -382,3 +382,80 @@ def test_img_url_token_counter(img_url):
|
||||||
|
|
||||||
def test_token_encode_disallowed_special():
|
def test_token_encode_disallowed_special():
|
||||||
encode(model="gpt-3.5-turbo", text="Hello, world! <|endoftext|>")
|
encode(model="gpt-3.5-turbo", text="Hello, world! <|endoftext|>")
|
||||||
|
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
from litellm.utils import encoding, _select_tokenizer_helper, claude_json_str
|
||||||
|
|
||||||
|
|
||||||
|
class TestTokenizerSelection(unittest.TestCase):
|
||||||
|
@patch("litellm.utils.Tokenizer.from_pretrained")
|
||||||
|
def test_llama3_tokenizer_api_failure(self, mock_from_pretrained):
|
||||||
|
# Setup mock to raise an error
|
||||||
|
mock_from_pretrained.side_effect = Exception("Failed to load tokenizer")
|
||||||
|
|
||||||
|
# Test with llama-3 model
|
||||||
|
result = _select_tokenizer_helper("llama-3-7b")
|
||||||
|
|
||||||
|
# Verify the attempt to load Llama-3 tokenizer
|
||||||
|
mock_from_pretrained.assert_called_once_with("Xenova/llama-3-tokenizer")
|
||||||
|
|
||||||
|
# Verify fallback to OpenAI tokenizer
|
||||||
|
self.assertEqual(result["type"], "openai_tokenizer")
|
||||||
|
self.assertEqual(result["tokenizer"], encoding)
|
||||||
|
|
||||||
|
@patch("litellm.utils.Tokenizer.from_pretrained")
|
||||||
|
def test_cohere_tokenizer_api_failure(self, mock_from_pretrained):
|
||||||
|
# Setup mock to raise an error
|
||||||
|
mock_from_pretrained.side_effect = Exception("Failed to load tokenizer")
|
||||||
|
|
||||||
|
# Add Cohere model to the list for testing
|
||||||
|
litellm.cohere_models = ["command-r-v1"]
|
||||||
|
|
||||||
|
# Test with Cohere model
|
||||||
|
result = _select_tokenizer_helper("command-r-v1")
|
||||||
|
|
||||||
|
# Verify the attempt to load Cohere tokenizer
|
||||||
|
mock_from_pretrained.assert_called_once_with(
|
||||||
|
"Xenova/c4ai-command-r-v01-tokenizer"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify fallback to OpenAI tokenizer
|
||||||
|
self.assertEqual(result["type"], "openai_tokenizer")
|
||||||
|
self.assertEqual(result["tokenizer"], encoding)
|
||||||
|
|
||||||
|
@patch("litellm.utils.Tokenizer.from_str")
|
||||||
|
def test_claude_tokenizer_api_failure(self, mock_from_str):
|
||||||
|
# Setup mock to raise an error
|
||||||
|
mock_from_str.side_effect = Exception("Failed to load tokenizer")
|
||||||
|
|
||||||
|
# Add Claude model to the list for testing
|
||||||
|
litellm.anthropic_models = ["claude-2"]
|
||||||
|
|
||||||
|
# Test with Claude model
|
||||||
|
result = _select_tokenizer_helper("claude-2")
|
||||||
|
|
||||||
|
# Verify the attempt to load Claude tokenizer
|
||||||
|
mock_from_str.assert_called_once_with(claude_json_str)
|
||||||
|
|
||||||
|
# Verify fallback to OpenAI tokenizer
|
||||||
|
self.assertEqual(result["type"], "openai_tokenizer")
|
||||||
|
self.assertEqual(result["tokenizer"], encoding)
|
||||||
|
|
||||||
|
@patch("litellm.utils.Tokenizer.from_pretrained")
|
||||||
|
def test_llama2_tokenizer_api_failure(self, mock_from_pretrained):
|
||||||
|
# Setup mock to raise an error
|
||||||
|
mock_from_pretrained.side_effect = Exception("Failed to load tokenizer")
|
||||||
|
|
||||||
|
# Test with Llama-2 model
|
||||||
|
result = _select_tokenizer_helper("llama-2-7b")
|
||||||
|
|
||||||
|
# Verify the attempt to load Llama-2 tokenizer
|
||||||
|
mock_from_pretrained.assert_called_once_with(
|
||||||
|
"hf-internal-testing/llama-tokenizer"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify fallback to OpenAI tokenizer
|
||||||
|
self.assertEqual(result["type"], "openai_tokenizer")
|
||||||
|
self.assertEqual(result["tokenizer"], encoding)
|
||||||
|
|
|
@ -1529,6 +1529,34 @@ def test_add_custom_logger_callback_to_specific_event_e2e(monkeypatch):
|
||||||
assert len(litellm.failure_callback) == curr_len_failure_callback
|
assert len(litellm.failure_callback) == curr_len_failure_callback
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_custom_logger_callback_to_specific_event_e2e_failure(monkeypatch):
|
||||||
|
from litellm.integrations.openmeter import OpenMeterLogger
|
||||||
|
|
||||||
|
monkeypatch.setattr(litellm, "success_callback", [])
|
||||||
|
monkeypatch.setattr(litellm, "failure_callback", [])
|
||||||
|
monkeypatch.setattr(litellm, "callbacks", [])
|
||||||
|
monkeypatch.setenv("OPENMETER_API_KEY", "wedlwe")
|
||||||
|
monkeypatch.setenv("OPENMETER_API_URL", "https://openmeter.dev")
|
||||||
|
|
||||||
|
litellm.failure_callback = ["openmeter"]
|
||||||
|
|
||||||
|
curr_len_success_callback = len(litellm.success_callback)
|
||||||
|
curr_len_failure_callback = len(litellm.failure_callback)
|
||||||
|
|
||||||
|
litellm.completion(
|
||||||
|
model="gpt-4o-mini",
|
||||||
|
messages=[{"role": "user", "content": "Hello, world!"}],
|
||||||
|
mock_response="Testing langfuse",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(litellm.success_callback) == curr_len_success_callback
|
||||||
|
assert len(litellm.failure_callback) == curr_len_failure_callback
|
||||||
|
|
||||||
|
assert any(
|
||||||
|
isinstance(callback, OpenMeterLogger) for callback in litellm.failure_callback
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_wrapper_kwargs_passthrough():
|
async def test_wrapper_kwargs_passthrough():
|
||||||
from litellm.utils import client
|
from litellm.utils import client
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue