diff --git a/litellm/utils.py b/litellm/utils.py index 976c8e2e4a..92d6dc37db 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -474,6 +474,11 @@ def function_setup( # noqa: PLR0915 if inspect.iscoroutinefunction(callback): litellm._async_failure_callback.append(callback) removed_async_items.append(index) + elif ( + callback in litellm._known_custom_logger_compatible_callbacks + and isinstance(callback, str) + ): + _add_custom_logger_callback_to_specific_event(callback, "failure") # Pop the async items from failure_callback in reverse order to avoid index issues for index in reversed(removed_async_items): @@ -1385,30 +1390,33 @@ def _select_tokenizer( @lru_cache(maxsize=128) def _select_tokenizer_helper(model: str): - if model in litellm.cohere_models and "command-r" in model: - # cohere - cohere_tokenizer = Tokenizer.from_pretrained( - "Xenova/c4ai-command-r-v01-tokenizer" - ) - return {"type": "huggingface_tokenizer", "tokenizer": cohere_tokenizer} - # anthropic - elif model in litellm.anthropic_models and "claude-3" not in model: - claude_tokenizer = Tokenizer.from_str(claude_json_str) - return {"type": "huggingface_tokenizer", "tokenizer": claude_tokenizer} - # llama2 - elif "llama-2" in model.lower() or "replicate" in model.lower(): - tokenizer = Tokenizer.from_pretrained("hf-internal-testing/llama-tokenizer") - return {"type": "huggingface_tokenizer", "tokenizer": tokenizer} - # llama3 - elif "llama-3" in model.lower(): - tokenizer = Tokenizer.from_pretrained("Xenova/llama-3-tokenizer") - return {"type": "huggingface_tokenizer", "tokenizer": tokenizer} + try: + if model in litellm.cohere_models and "command-r" in model: + # cohere + cohere_tokenizer = Tokenizer.from_pretrained( + "Xenova/c4ai-command-r-v01-tokenizer" + ) + return {"type": "huggingface_tokenizer", "tokenizer": cohere_tokenizer} + # anthropic + elif model in litellm.anthropic_models and "claude-3" not in model: + claude_tokenizer = Tokenizer.from_str(claude_json_str) + return {"type": "huggingface_tokenizer", "tokenizer": claude_tokenizer} + # llama2 + elif "llama-2" in model.lower() or "replicate" in model.lower(): + tokenizer = Tokenizer.from_pretrained("hf-internal-testing/llama-tokenizer") + return {"type": "huggingface_tokenizer", "tokenizer": tokenizer} + # llama3 + elif "llama-3" in model.lower(): + tokenizer = Tokenizer.from_pretrained("Xenova/llama-3-tokenizer") + return {"type": "huggingface_tokenizer", "tokenizer": tokenizer} + except Exception as e: + verbose_logger.debug(f"Error selecting tokenizer: {e}") + # default - tiktoken - else: - return { - "type": "openai_tokenizer", - "tokenizer": encoding, - } # default to openai tokenizer + return { + "type": "openai_tokenizer", + "tokenizer": encoding, + } # default to openai tokenizer def encode(model="", text="", custom_tokenizer: Optional[dict] = None): diff --git a/tests/local_testing/test_token_counter.py b/tests/local_testing/test_token_counter.py index ef9cc91945..e1e2c36e9f 100644 --- a/tests/local_testing/test_token_counter.py +++ b/tests/local_testing/test_token_counter.py @@ -382,3 +382,80 @@ def test_img_url_token_counter(img_url): def test_token_encode_disallowed_special(): encode(model="gpt-3.5-turbo", text="Hello, world! <|endoftext|>") + + +import unittest +from unittest.mock import patch, MagicMock +from litellm.utils import encoding, _select_tokenizer_helper, claude_json_str + + +class TestTokenizerSelection(unittest.TestCase): + @patch("litellm.utils.Tokenizer.from_pretrained") + def test_llama3_tokenizer_api_failure(self, mock_from_pretrained): + # Setup mock to raise an error + mock_from_pretrained.side_effect = Exception("Failed to load tokenizer") + + # Test with llama-3 model + result = _select_tokenizer_helper("llama-3-7b") + + # Verify the attempt to load Llama-3 tokenizer + mock_from_pretrained.assert_called_once_with("Xenova/llama-3-tokenizer") + + # Verify fallback to OpenAI tokenizer + self.assertEqual(result["type"], "openai_tokenizer") + self.assertEqual(result["tokenizer"], encoding) + + @patch("litellm.utils.Tokenizer.from_pretrained") + def test_cohere_tokenizer_api_failure(self, mock_from_pretrained): + # Setup mock to raise an error + mock_from_pretrained.side_effect = Exception("Failed to load tokenizer") + + # Add Cohere model to the list for testing + litellm.cohere_models = ["command-r-v1"] + + # Test with Cohere model + result = _select_tokenizer_helper("command-r-v1") + + # Verify the attempt to load Cohere tokenizer + mock_from_pretrained.assert_called_once_with( + "Xenova/c4ai-command-r-v01-tokenizer" + ) + + # Verify fallback to OpenAI tokenizer + self.assertEqual(result["type"], "openai_tokenizer") + self.assertEqual(result["tokenizer"], encoding) + + @patch("litellm.utils.Tokenizer.from_str") + def test_claude_tokenizer_api_failure(self, mock_from_str): + # Setup mock to raise an error + mock_from_str.side_effect = Exception("Failed to load tokenizer") + + # Add Claude model to the list for testing + litellm.anthropic_models = ["claude-2"] + + # Test with Claude model + result = _select_tokenizer_helper("claude-2") + + # Verify the attempt to load Claude tokenizer + mock_from_str.assert_called_once_with(claude_json_str) + + # Verify fallback to OpenAI tokenizer + self.assertEqual(result["type"], "openai_tokenizer") + self.assertEqual(result["tokenizer"], encoding) + + @patch("litellm.utils.Tokenizer.from_pretrained") + def test_llama2_tokenizer_api_failure(self, mock_from_pretrained): + # Setup mock to raise an error + mock_from_pretrained.side_effect = Exception("Failed to load tokenizer") + + # Test with Llama-2 model + result = _select_tokenizer_helper("llama-2-7b") + + # Verify the attempt to load Llama-2 tokenizer + mock_from_pretrained.assert_called_once_with( + "hf-internal-testing/llama-tokenizer" + ) + + # Verify fallback to OpenAI tokenizer + self.assertEqual(result["type"], "openai_tokenizer") + self.assertEqual(result["tokenizer"], encoding) diff --git a/tests/local_testing/test_utils.py b/tests/local_testing/test_utils.py index c651d84fa3..866577c69a 100644 --- a/tests/local_testing/test_utils.py +++ b/tests/local_testing/test_utils.py @@ -1529,6 +1529,34 @@ def test_add_custom_logger_callback_to_specific_event_e2e(monkeypatch): assert len(litellm.failure_callback) == curr_len_failure_callback +def test_add_custom_logger_callback_to_specific_event_e2e_failure(monkeypatch): + from litellm.integrations.openmeter import OpenMeterLogger + + monkeypatch.setattr(litellm, "success_callback", []) + monkeypatch.setattr(litellm, "failure_callback", []) + monkeypatch.setattr(litellm, "callbacks", []) + monkeypatch.setenv("OPENMETER_API_KEY", "wedlwe") + monkeypatch.setenv("OPENMETER_API_URL", "https://openmeter.dev") + + litellm.failure_callback = ["openmeter"] + + curr_len_success_callback = len(litellm.success_callback) + curr_len_failure_callback = len(litellm.failure_callback) + + litellm.completion( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "Hello, world!"}], + mock_response="Testing langfuse", + ) + + assert len(litellm.success_callback) == curr_len_success_callback + assert len(litellm.failure_callback) == curr_len_failure_callback + + assert any( + isinstance(callback, OpenMeterLogger) for callback in litellm.failure_callback + ) + + @pytest.mark.asyncio async def test_wrapper_kwargs_passthrough(): from litellm.utils import client