From 19fb24cd1555b8fa3bd2bc250d7ce42eefa1ac33 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 23 Nov 2023 21:41:36 -0800 Subject: [PATCH] (feat) cost tracking for azure llms --- litellm/llms/azure.py | 4 +++- litellm/tests/test_get_model_cost_map.py | 2 +- litellm/utils.py | 17 +++++++++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py index 660617f0cc..c89f10c2cc 100644 --- a/litellm/llms/azure.py +++ b/litellm/llms/azure.py @@ -157,6 +157,7 @@ class AzureChatCompletion(BaseLLM): azure_client_params["azure_ad_token"] = azure_ad_token azure_client = AzureOpenAI(**azure_client_params) response = azure_client.chat.completions.create(**data) # type: ignore + response.model = "azure/" + str(response.model) return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response) except AzureOpenAIError as e: exception_mapping_worked = True @@ -193,6 +194,7 @@ class AzureChatCompletion(BaseLLM): azure_client_params["azure_ad_token"] = azure_ad_token azure_client = AsyncAzureOpenAI(**azure_client_params) response = await azure_client.chat.completions.create(**data) + response.model = "azure" + str(response.model) return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response) except Exception as e: if isinstance(e,httpx.TimeoutException): @@ -335,7 +337,7 @@ class AzureChatCompletion(BaseLLM): ) model_response["object"] = "list" model_response["data"] = output_data - model_response["model"] = model + model_response["model"] = "azure/" + model model_response["usage"] = embedding_response["usage"] return model_response except AzureOpenAIError as e: diff --git a/litellm/tests/test_get_model_cost_map.py b/litellm/tests/test_get_model_cost_map.py index a6a7b8553e..b7763da12a 100644 --- a/litellm/tests/test_get_model_cost_map.py +++ b/litellm/tests/test_get_model_cost_map.py @@ -68,7 +68,7 @@ def test_cost_azure_gpt_35(): id='chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac', choices=[Choices(finish_reason=None, index=0, message=Message(content=' Sure! Here is a short poem about the sky:\n\nA canvas of blue, a', role='assistant'))], - model='chatGPT-deployment-LiteLLM-isAMAZING', + model='azure/gpt-35-turbo', # azure always has model written like this usage=Usage(prompt_tokens=21, completion_tokens=17, total_tokens=38) ) diff --git a/litellm/utils.py b/litellm/utils.py index e4229ac80b..f1cf524351 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1602,6 +1602,15 @@ def cost_per_token(model="", prompt_tokens=0, completion_tokens=0): prompt_tokens_cost_usd_dollar = 0 completion_tokens_cost_usd_dollar = 0 model_cost_ref = litellm.model_cost + + # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models + azure_llms = { + "gpt-35-turbo": "azure/gpt-3.5-turbo", + "gpt-35-turbo-16k": "azure/gpt-3.5-turbo-16k", + "gpt-35-turbo-instruct": "azure/gpt-3.5-turbo-instruct" + } + if "azure/" in model: + model = model.replace("azure/", "") if model in model_cost_ref: prompt_tokens_cost_usd_dollar = ( model_cost_ref[model]["input_cost_per_token"] * prompt_tokens @@ -1619,6 +1628,14 @@ def cost_per_token(model="", prompt_tokens=0, completion_tokens=0): model_cost_ref["ft:gpt-3.5-turbo"]["output_cost_per_token"] * completion_tokens ) return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + elif model in azure_llms: + model = azure_llms[model] + prompt_tokens_cost_usd_dollar = ( + model_cost_ref[model]["input_cost_per_token"] * prompt_tokens + ) + completion_tokens_cost_usd_dollar = ( + model_cost_ref[model]["output_cost_per_token"] * completion_tokens + ) else: # calculate average input cost, azure/gpt-deployments can potentially go here if users don't specify, gpt-4, gpt-3.5-turbo. LLMs litellm knows input_cost_sum = 0