Litellm dev 12 12 2024 (#7203)

* fix(azure/): support passing headers to azure openai endpoints Fixes https://github.com/BerriAI/litellm/issues/6217 * fix(utils.py): move default tokenizer to just openai hf tokenizer makes network calls when trying to get the tokenizer - this slows down execution time calls * fix(router.py): fix pattern matching router - add generic "*" to it as well Fixes issue where generic "*" model access group wouldn't show up * fix(pattern_match_deployments.py): match to more specific pattern match to more specific pattern allows setting generic wildcard model access group and excluding specific models more easily * fix(proxy_server.py): fix _delete_deployment to handle base case where db_model list is empty don't delete all router models b/c of empty list Fixes https://github.com/BerriAI/litellm/issues/7196 * fix(anthropic/): fix handling response_format for anthropic messages with anthropic api * fix(fireworks_ai/): support passing response_format + tool call in same message Addresses https://github.com/BerriAI/litellm/issues/7135 * Revert "fix(fireworks_ai/): support passing response_format + tool call in same message" This reverts commit 6a30dc6929. * test: fix test * fix(replicate/): fix replicate default retry/polling logic * test: add unit testing for router pattern matching * test: update test to use default oai tokenizer * test: mark flaky test * test: skip flaky test
2025-04-25 18:54:30 +00:00 · 2024-12-13 08:54:03 -08:00 · 2024-12-13 08:54:03 -08:00 · e68bb4e051
commit e68bb4e051
parent 15a0572a06
19 changed files with 496 additions and 103 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -1214,7 +1214,9 @@ def client(original_function):  # noqa: PLR0915


@lru_cache(maxsize=128)
-def _select_tokenizer(model: str):
+def _select_tokenizer(
+    model: str,
+):
    if model in litellm.cohere_models and "command-r" in model:
        # cohere
        cohere_tokenizer = Tokenizer.from_pretrained(
@ -1235,19 +1237,10 @@ def _select_tokenizer(model: str):
        return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
    # default - tiktoken
    else:
-        tokenizer = None
-        if (
-            model in litellm.open_ai_chat_completion_models
-            or model in litellm.open_ai_text_completion_models
-            or model in litellm.open_ai_embedding_models
-        ):
-            return {"type": "openai_tokenizer", "tokenizer": encoding}
-
-        try:
-            tokenizer = Tokenizer.from_pretrained(model)
-            return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
-        except Exception:
-            return {"type": "openai_tokenizer", "tokenizer": encoding}
+        return {
+            "type": "openai_tokenizer",
+            "tokenizer": encoding,
+        }  # default to openai tokenizer


 def encode(model="", text="", custom_tokenizer: Optional[dict] = None):