test - token count response

2024-05-16 13:20:01 -07:00 · 2024-05-16 13:20:01 -07:00 · 4a5e6aa43c
commit 4a5e6aa43c
parent 22ba5fa186
3 changed files with 157 additions and 14 deletions
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -4779,33 +4779,38 @@ async def token_counter(request: TokenCountRequest):

    prompt = request.prompt
    messages = request.messages
+    if prompt is None and messages is None:
+        raise HTTPException(
+            status_code=400, detail="prompt or messages must be provided"
+        )

+    deployment = None
+    litellm_model_name = None
    if llm_router is not None:
        # get 1 deployment corresponding to the model
        for _model in llm_router.model_list:
            if _model["model_name"] == request.model:
                deployment = _model
                break
+    if deployment is not None:
+        litellm_model_name = deployment.get("litellm_params", {}).get("model")
+        # remove the custom_llm_provider_prefix in the litellm_model_name
+        if "/" in litellm_model_name:
+            litellm_model_name = litellm_model_name.split("/", 1)[1]

-    litellm_model_name = deployment.get("litellm_params", {}).get("model")
-    # remove the custom_llm_provider_prefix in the litellm_model_name
-    if "/" in litellm_model_name:
-        litellm_model_name = litellm_model_name.split("/", 1)[1]
-
-    if prompt is None and messages is None:
-        raise HTTPException(
-            status_code=400, detail="prompt or messages must be provided"
-        )
+    model_to_use = (
+        litellm_model_name or request.model
+    )  # use litellm model name, if it's not avalable then fallback to request.model
    total_tokens, tokenizer_used = token_counter(
-        model=litellm_model_name,
+        model=model_to_use,
        text=prompt,
        messages=messages,
        return_tokenizer_used=True,
    )
    return TokenCountResponse(
        total_tokens=total_tokens,
-        model=request.model,
-        base_model=litellm_model_name,
+        request_model=request.model,
+        model_used=model_to_use,
        tokenizer_type=tokenizer_used,
    )