(feat) cost tracking for azure llms

2023-11-23 21:41:36 -08:00 · 2023-11-23 21:41:36 -08:00 · 19fb24cd15
commit 19fb24cd15
parent 9a44433844
3 changed files with 21 additions and 2 deletions
--- a/litellm/llms/azure.py
+++ b/litellm/llms/azure.py
@ -157,6 +157,7 @@ class AzureChatCompletion(BaseLLM):
                    azure_client_params["azure_ad_token"] = azure_ad_token
                azure_client = AzureOpenAI(**azure_client_params)
                response = azure_client.chat.completions.create(**data) # type: ignore
+                response.model = "azure/" + str(response.model)
                return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
        except AzureOpenAIError as e: 
            exception_mapping_worked = True
@ -193,6 +194,7 @@ class AzureChatCompletion(BaseLLM):
                azure_client_params["azure_ad_token"] = azure_ad_token
            azure_client = AsyncAzureOpenAI(**azure_client_params)
            response = await azure_client.chat.completions.create(**data) 
+            response.model = "azure" + str(response.model)
            return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
       except Exception as e: 
           if isinstance(e,httpx.TimeoutException):
@ -335,7 +337,7 @@ class AzureChatCompletion(BaseLLM):
                )
            model_response["object"] = "list"
            model_response["data"] = output_data
-            model_response["model"] = model
+            model_response["model"] = "azure/" + model
            model_response["usage"] = embedding_response["usage"]
            return model_response
        except AzureOpenAIError as e: 
--- a/litellm/tests/test_get_model_cost_map.py
+++ b/litellm/tests/test_get_model_cost_map.py
@ -68,7 +68,7 @@ def test_cost_azure_gpt_35():
            id='chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac', 
            choices=[Choices(finish_reason=None, index=0, 
                            message=Message(content=' Sure! Here is a short poem about the sky:\n\nA canvas of blue, a', role='assistant'))], 
-                            model='chatGPT-deployment-LiteLLM-isAMAZING', 
+                            model='azure/gpt-35-turbo',  # azure always has model written like this
                            usage=Usage(prompt_tokens=21, completion_tokens=17, total_tokens=38)
        )

--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -1602,6 +1602,15 @@ def cost_per_token(model="", prompt_tokens=0, completion_tokens=0):
    prompt_tokens_cost_usd_dollar = 0
    completion_tokens_cost_usd_dollar = 0
    model_cost_ref = litellm.model_cost
+
+    # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
+    azure_llms = {
+        "gpt-35-turbo": "azure/gpt-3.5-turbo",
+        "gpt-35-turbo-16k": "azure/gpt-3.5-turbo-16k",
+        "gpt-35-turbo-instruct": "azure/gpt-3.5-turbo-instruct"
+    }
+    if "azure/" in  model:
+        model = model.replace("azure/", "")
    if model in model_cost_ref:
        prompt_tokens_cost_usd_dollar = (
            model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
@ -1619,6 +1628,14 @@ def cost_per_token(model="", prompt_tokens=0, completion_tokens=0):
            model_cost_ref["ft:gpt-3.5-turbo"]["output_cost_per_token"] * completion_tokens
        )
        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    elif model in azure_llms:
+        model = azure_llms[model]
+        prompt_tokens_cost_usd_dollar = (
+            model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
+        )
+        completion_tokens_cost_usd_dollar = (
+            model_cost_ref[model]["output_cost_per_token"] * completion_tokens
+        )
    else:
        # calculate average input cost, azure/gpt-deployments can potentially go here if users don't specify, gpt-4, gpt-3.5-turbo. LLMs litellm knows
        input_cost_sum = 0