diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 2a539dcd7..3223a3c15 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -863,7 +863,7 @@ async def model_info(request: Request): model_name = m["model_name"] model_params = {} for k,v in m["litellm_params"].items(): - if k == "api_key": # don't send the api key + if k == "api_key" or k == "api_base": # don't send the api key or api base continue if k == "model": diff --git a/litellm/utils.py b/litellm/utils.py index 50202071a..2f07f8dfd 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1660,8 +1660,6 @@ def cost_per_token(model="", prompt_tokens=0, completion_tokens=0): "gpt-35-turbo-16k": "azure/gpt-3.5-turbo-16k", "gpt-35-turbo-instruct": "azure/gpt-3.5-turbo-instruct" } - if "azure/" in model: - model = model.replace("azure/", "") if model in model_cost_ref: prompt_tokens_cost_usd_dollar = ( model_cost_ref[model]["input_cost_per_token"] * prompt_tokens @@ -1741,7 +1739,7 @@ def completion_cost( # Handle Inputs to completion_cost prompt_tokens = 0 completion_tokens = 0 - if completion_response != None: + if completion_response is not None: # get input/output tokens from completion_response prompt_tokens = completion_response['usage']['prompt_tokens'] completion_tokens = completion_response['usage']['completion_tokens']