diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 2a539dcd7..3223a3c15 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -863,7 +863,7 @@ async def model_info(request: Request):
         model_name = m["model_name"]
         model_params = {}
         for k,v in m["litellm_params"].items():
-            if k == "api_key": # don't send the api key
+            if k == "api_key" or k == "api_base": # don't send the api key or api base
                 continue 
             
             if k == "model": 
diff --git a/litellm/utils.py b/litellm/utils.py
index 50202071a..2f07f8dfd 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -1660,8 +1660,6 @@ def cost_per_token(model="", prompt_tokens=0, completion_tokens=0):
         "gpt-35-turbo-16k": "azure/gpt-3.5-turbo-16k",
         "gpt-35-turbo-instruct": "azure/gpt-3.5-turbo-instruct"
     }
-    if "azure/" in  model:
-        model = model.replace("azure/", "")
     if model in model_cost_ref:
         prompt_tokens_cost_usd_dollar = (
             model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
@@ -1741,7 +1739,7 @@ def completion_cost(
         # Handle Inputs to completion_cost
         prompt_tokens = 0
         completion_tokens = 0
-        if completion_response != None:
+        if completion_response is not None:
             # get input/output tokens from completion_response
             prompt_tokens = completion_response['usage']['prompt_tokens']
             completion_tokens = completion_response['usage']['completion_tokens']