feat - try using hf tokenizer

This commit is contained in:
Ishaan Jaff 2024-05-16 10:59:29 -07:00
parent c646b809a6
commit 22ba5fa186
3 changed files with 34 additions and 4 deletions

View file

@ -4775,21 +4775,38 @@ async def token_counter(request: TokenCountRequest):
""" """
from litellm import token_counter
global llm_router
prompt = request.prompt
messages = request.messages
if llm_router is not None:
# get 1 deployment corresponding to the model
for _model in llm_router.model_list:
if _model["model_name"] == request.model:
deployment = _model
break
litellm_model_name = deployment.get("litellm_params", {}).get("model")
# remove the custom_llm_provider_prefix in the litellm_model_name
if "/" in litellm_model_name:
litellm_model_name = litellm_model_name.split("/", 1)[1]
if prompt is None and messages is None:
raise HTTPException(
status_code=400, detail="prompt or messages must be provided"
)
total_tokens = token_counter(
model=request.model,
total_tokens, tokenizer_used = token_counter(
model=litellm_model_name,
text=prompt,
messages=messages,
return_tokenizer_used=True,
)
return TokenCountResponse(
total_tokens=total_tokens,
model=request.model,
base_model=litellm_model_name,
tokenizer_type=tokenizer_used,
)