Fix team-based logging to langfuse + allow custom tokenizer on /token_counter endpoint (#7493)

* fix(langfuse_prompt_management.py): migrate dynamic logging to langfuse custom logger compatible class

* fix(langfuse_prompt_management.py): support failure callback logging to langfuse as well

* feat(proxy_server.py): support setting custom tokenizer on config.yaml

Allows customizing value for `/utils/token_counter`

* fix(proxy_server.py): fix linting errors

* test: skip if file not found

* style: cleanup unused import

* docs(configs.md): add docs on setting custom tokenizer
This commit is contained in:
Krish Dholakia 2024-12-31 23:18:41 -08:00 committed by GitHub
parent 6705e30d5d
commit 080de89cfb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 192 additions and 72 deletions

View file

@ -126,6 +126,7 @@ from litellm.types.utils import (
ChatCompletionMessageToolCall,
Choices,
CostPerToken,
CustomHuggingfaceTokenizer,
Delta,
Embedding,
EmbeddingResponse,
@ -1242,10 +1243,21 @@ def _is_async_request(
return False
@lru_cache(maxsize=128)
def _select_tokenizer(
model: str,
model: str, custom_tokenizer: Optional[CustomHuggingfaceTokenizer] = None
):
if custom_tokenizer is not None:
custom_tokenizer = Tokenizer.from_pretrained(
custom_tokenizer["identifier"],
revision=custom_tokenizer["revision"],
auth_token=custom_tokenizer["auth_token"],
)
return {"type": "huggingface_tokenizer", "tokenizer": custom_tokenizer}
return _select_tokenizer_helper(model=model)
@lru_cache(maxsize=128)
def _select_tokenizer_helper(model: str):
if model in litellm.cohere_models and "command-r" in model:
# cohere
cohere_tokenizer = Tokenizer.from_pretrained(