diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index 2b34acceb..9aef0304c 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -556,7 +556,7 @@ class PrismaClient:
                     where={"token": token},  # type: ignore
                     data={**db_data},  # type: ignore
                 )
-                print_verbose(
+                verbose_proxy_logger.debug(
                     "\033[91m"
                     + f"DB Token Table update succeeded {response}"
                     + "\033[0m"
diff --git a/litellm/utils.py b/litellm/utils.py
index cca8bc85e..7a6b12a82 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -2938,17 +2938,25 @@ def cost_per_token(
         )
         return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
     elif model_with_provider in model_cost_ref:
-        print_verbose(f"Looking up model={model_with_provider} in model_cost_map")
+        verbose_logger.debug(
+            f"Looking up model={model_with_provider} in model_cost_map"
+        )
+        verbose_logger.debug(
+            f"applying cost={model_cost_ref[model_with_provider]['input_cost_per_token']} for prompt_tokens={prompt_tokens}"
+        )
         prompt_tokens_cost_usd_dollar = (
             model_cost_ref[model_with_provider]["input_cost_per_token"] * prompt_tokens
         )
+        verbose_logger.debug(
+            f"applying cost={model_cost_ref[model_with_provider]['output_cost_per_token']} for completion_tokens={completion_tokens}"
+        )
         completion_tokens_cost_usd_dollar = (
             model_cost_ref[model_with_provider]["output_cost_per_token"]
             * completion_tokens
         )
         return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
     elif "ft:gpt-3.5-turbo" in model:
-        print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
+        verbose_logger.debug(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
         # fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm
         prompt_tokens_cost_usd_dollar = (
             model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens
@@ -2959,17 +2967,23 @@ def cost_per_token(
         )
         return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
     elif model in litellm.azure_llms:
-        print_verbose(f"Cost Tracking: {model} is an Azure LLM")
+        verbose_logger.debug(f"Cost Tracking: {model} is an Azure LLM")
         model = litellm.azure_llms[model]
+        verbose_logger.debug(
+            f"applying cost={model_cost_ref[model]['input_cost_per_token']} for prompt_tokens={prompt_tokens}"
+        )
         prompt_tokens_cost_usd_dollar = (
             model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
         )
+        verbose_logger.debug(
+            f"applying cost={model_cost_ref[model]['output_cost_per_token']} for completion_tokens={completion_tokens}"
+        )
         completion_tokens_cost_usd_dollar = (
             model_cost_ref[model]["output_cost_per_token"] * completion_tokens
         )
         return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
     elif model in litellm.azure_embedding_models:
-        print_verbose(f"Cost Tracking: {model} is an Azure Embedding Model")
+        verbose_logger.debug(f"Cost Tracking: {model} is an Azure Embedding Model")
         model = litellm.azure_embedding_models[model]
         prompt_tokens_cost_usd_dollar = (
             model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
diff --git a/tests/test_keys.py b/tests/test_keys.py
index a0bf7387d..917c50823 100644
--- a/tests/test_keys.py
+++ b/tests/test_keys.py
@@ -2,7 +2,7 @@
 ## Tests /key endpoints.
 
 import pytest
-import asyncio
+import asyncio, time
 import aiohttp
 from openai import AsyncOpenAI
 import sys, os
@@ -95,11 +95,10 @@ async def chat_completion(session, key, model="gpt-4"):
 async def chat_completion_streaming(session, key, model="gpt-4"):
     client = AsyncOpenAI(api_key=key, base_url="http://0.0.0.0:4000")
     messages = [
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "Hello!"},
+        {"role": "system", "content": "You are a helpful assistant"},
+        {"role": "user", "content": f"Hello! {time.time()}"},
     ]
     prompt_tokens = litellm.token_counter(model="gpt-35-turbo", messages=messages)
-    assert prompt_tokens == 19
     data = {
         "model": model,
         "messages": messages,
@@ -114,7 +113,7 @@ async def chat_completion_streaming(session, key, model="gpt-4"):
     print(f"content: {content}")
 
     completion_tokens = litellm.token_counter(
-        model="azure/gpt-35-turbo", text=content, count_response_tokens=True
+        model="gpt-35-turbo", text=content, count_response_tokens=True
     )
 
     return prompt_tokens, completion_tokens
@@ -251,7 +250,7 @@ async def test_key_info_spend_values():
         )
         print(f"prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}")
         prompt_cost, completion_cost = litellm.cost_per_token(
-            model="gpt-35-turbo",
+            model="azure/gpt-35-turbo",
             prompt_tokens=prompt_tokens,
             completion_tokens=completion_tokens,
         )