Fix team-based logging to langfuse + allow custom tokenizer on /token_counter endpoint (#7493)

* fix(langfuse_prompt_management.py): migrate dynamic logging to langfuse custom logger compatible class

* fix(langfuse_prompt_management.py): support failure callback logging to langfuse as well

* feat(proxy_server.py): support setting custom tokenizer on config.yaml

Allows customizing value for `/utils/token_counter`

* fix(proxy_server.py): fix linting errors

* test: skip if file not found

* style: cleanup unused import

* docs(configs.md): add docs on setting custom tokenizer
This commit is contained in:
Krish Dholakia 2024-12-31 23:18:41 -08:00 committed by GitHub
parent 6705e30d5d
commit 080de89cfb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 192 additions and 72 deletions

View file

@ -274,6 +274,8 @@ from litellm.types.llms.anthropic import (
from litellm.types.llms.openai import HttpxBinaryResponseContent
from litellm.types.router import ModelInfo as RouterModelInfo
from litellm.types.router import RouterGeneralSettings, updateDeployment
from litellm.types.utils import CustomHuggingfaceTokenizer
from litellm.types.utils import ModelInfo as ModelMapInfo
from litellm.types.utils import StandardLoggingPayload
from litellm.utils import get_end_user_id_for_cost_tracking
@ -5526,11 +5528,16 @@ async def token_counter(request: TokenCountRequest):
deployment = None
litellm_model_name = None
model_info: Optional[ModelMapInfo] = None
if llm_router is not None:
# get 1 deployment corresponding to the model
for _model in llm_router.model_list:
if _model["model_name"] == request.model:
deployment = _model
model_info = llm_router.get_router_model_info(
deployment=deployment,
received_model_name=request.model,
)
break
if deployment is not None:
litellm_model_name = deployment.get("litellm_params", {}).get("model")
@ -5541,12 +5548,22 @@ async def token_counter(request: TokenCountRequest):
model_to_use = (
litellm_model_name or request.model
) # use litellm model name, if it's not avalable then fallback to request.model
_tokenizer_used = litellm.utils._select_tokenizer(model=model_to_use)
custom_tokenizer: Optional[CustomHuggingfaceTokenizer] = None
if model_info is not None:
custom_tokenizer = cast(
Optional[CustomHuggingfaceTokenizer],
model_info.get("custom_tokenizer", None),
)
_tokenizer_used = litellm.utils._select_tokenizer(
model=model_to_use, custom_tokenizer=custom_tokenizer
)
tokenizer_used = str(_tokenizer_used["type"])
total_tokens = token_counter(
model=model_to_use,
text=prompt,
messages=messages,
custom_tokenizer=_tokenizer_used,
)
return TokenCountResponse(
total_tokens=total_tokens,