test - token count response

This commit is contained in:
Ishaan Jaff 2024-05-16 13:20:01 -07:00
parent 22ba5fa186
commit 4a5e6aa43c
3 changed files with 157 additions and 14 deletions

View file

@ -4779,33 +4779,38 @@ async def token_counter(request: TokenCountRequest):
prompt = request.prompt
messages = request.messages
if prompt is None and messages is None:
raise HTTPException(
status_code=400, detail="prompt or messages must be provided"
)
deployment = None
litellm_model_name = None
if llm_router is not None:
# get 1 deployment corresponding to the model
for _model in llm_router.model_list:
if _model["model_name"] == request.model:
deployment = _model
break
if deployment is not None:
litellm_model_name = deployment.get("litellm_params", {}).get("model")
# remove the custom_llm_provider_prefix in the litellm_model_name
if "/" in litellm_model_name:
litellm_model_name = litellm_model_name.split("/", 1)[1]
litellm_model_name = deployment.get("litellm_params", {}).get("model")
# remove the custom_llm_provider_prefix in the litellm_model_name
if "/" in litellm_model_name:
litellm_model_name = litellm_model_name.split("/", 1)[1]
if prompt is None and messages is None:
raise HTTPException(
status_code=400, detail="prompt or messages must be provided"
)
model_to_use = (
litellm_model_name or request.model
) # use litellm model name, if it's not avalable then fallback to request.model
total_tokens, tokenizer_used = token_counter(
model=litellm_model_name,
model=model_to_use,
text=prompt,
messages=messages,
return_tokenizer_used=True,
)
return TokenCountResponse(
total_tokens=total_tokens,
model=request.model,
base_model=litellm_model_name,
request_model=request.model,
model_used=model_to_use,
tokenizer_type=tokenizer_used,
)