test(test_keys.py): use correct model name for token counting

2024-01-23 17:46:14 -08:00 · 2024-01-23 17:46:14 -08:00 · d6844f43c8
commit d6844f43c8
parent 4ca4913468
3 changed files with 24 additions and 11 deletions
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -556,7 +556,7 @@ class PrismaClient:
                    where={"token": token},  # type: ignore
                    data={**db_data},  # type: ignore
                )
-                print_verbose(
+                verbose_proxy_logger.debug(
                    "\033[91m"
                    + f"DB Token Table update succeeded {response}"
                    + "\033[0m"
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -2938,17 +2938,25 @@ def cost_per_token(
        )
        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
    elif model_with_provider in model_cost_ref:
-        print_verbose(f"Looking up model={model_with_provider} in model_cost_map")
+        verbose_logger.debug(
+            f"Looking up model={model_with_provider} in model_cost_map"
+        )
+        verbose_logger.debug(
+            f"applying cost={model_cost_ref[model_with_provider]['input_cost_per_token']} for prompt_tokens={prompt_tokens}"
+        )
        prompt_tokens_cost_usd_dollar = (
            model_cost_ref[model_with_provider]["input_cost_per_token"] * prompt_tokens
        )
+        verbose_logger.debug(
+            f"applying cost={model_cost_ref[model_with_provider]['output_cost_per_token']} for completion_tokens={completion_tokens}"
+        )
        completion_tokens_cost_usd_dollar = (
            model_cost_ref[model_with_provider]["output_cost_per_token"]
            * completion_tokens
        )
        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
    elif "ft:gpt-3.5-turbo" in model:
-        print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
+        verbose_logger.debug(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
        # fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm
        prompt_tokens_cost_usd_dollar = (
            model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens
@ -2959,17 +2967,23 @@ def cost_per_token(
        )
        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
    elif model in litellm.azure_llms:
-        print_verbose(f"Cost Tracking: {model} is an Azure LLM")
+        verbose_logger.debug(f"Cost Tracking: {model} is an Azure LLM")
        model = litellm.azure_llms[model]
+        verbose_logger.debug(
+            f"applying cost={model_cost_ref[model]['input_cost_per_token']} for prompt_tokens={prompt_tokens}"
+        )
        prompt_tokens_cost_usd_dollar = (
            model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
        )
+        verbose_logger.debug(
+            f"applying cost={model_cost_ref[model]['output_cost_per_token']} for completion_tokens={completion_tokens}"
+        )
        completion_tokens_cost_usd_dollar = (
            model_cost_ref[model]["output_cost_per_token"] * completion_tokens
        )
        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
    elif model in litellm.azure_embedding_models:
-        print_verbose(f"Cost Tracking: {model} is an Azure Embedding Model")
+        verbose_logger.debug(f"Cost Tracking: {model} is an Azure Embedding Model")
        model = litellm.azure_embedding_models[model]
        prompt_tokens_cost_usd_dollar = (
            model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
--- a/tests/test_keys.py
+++ b/tests/test_keys.py
@ -2,7 +2,7 @@
 ## Tests /key endpoints.

 import pytest
-import asyncio
+import asyncio, time
 import aiohttp
 from openai import AsyncOpenAI
 import sys, os
@ -95,11 +95,10 @@ async def chat_completion(session, key, model="gpt-4"):
 async def chat_completion_streaming(session, key, model="gpt-4"):
    client = AsyncOpenAI(api_key=key, base_url="http://0.0.0.0:4000")
    messages = [
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "Hello!"},
+        {"role": "system", "content": "You are a helpful assistant"},
+        {"role": "user", "content": f"Hello! {time.time()}"},
    ]
    prompt_tokens = litellm.token_counter(model="gpt-35-turbo", messages=messages)
-    assert prompt_tokens == 19
    data = {
        "model": model,
        "messages": messages,
@ -114,7 +113,7 @@ async def chat_completion_streaming(session, key, model="gpt-4"):
    print(f"content: {content}")

    completion_tokens = litellm.token_counter(
-        model="azure/gpt-35-turbo", text=content, count_response_tokens=True
+        model="gpt-35-turbo", text=content, count_response_tokens=True
    )

    return prompt_tokens, completion_tokens
@ -251,7 +250,7 @@ async def test_key_info_spend_values():
        )
        print(f"prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}")
        prompt_cost, completion_cost = litellm.cost_per_token(
-            model="gpt-35-turbo",
+            model="azure/gpt-35-turbo",
            prompt_tokens=prompt_tokens,
            completion_tokens=completion_tokens,
        )