forked from phoenix/litellm-mirror
(feat) cost tracking for azure llms
This commit is contained in:
parent
9a44433844
commit
19fb24cd15
3 changed files with 21 additions and 2 deletions
|
@ -157,6 +157,7 @@ class AzureChatCompletion(BaseLLM):
|
||||||
azure_client_params["azure_ad_token"] = azure_ad_token
|
azure_client_params["azure_ad_token"] = azure_ad_token
|
||||||
azure_client = AzureOpenAI(**azure_client_params)
|
azure_client = AzureOpenAI(**azure_client_params)
|
||||||
response = azure_client.chat.completions.create(**data) # type: ignore
|
response = azure_client.chat.completions.create(**data) # type: ignore
|
||||||
|
response.model = "azure/" + str(response.model)
|
||||||
return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
|
return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
|
||||||
except AzureOpenAIError as e:
|
except AzureOpenAIError as e:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
@ -193,6 +194,7 @@ class AzureChatCompletion(BaseLLM):
|
||||||
azure_client_params["azure_ad_token"] = azure_ad_token
|
azure_client_params["azure_ad_token"] = azure_ad_token
|
||||||
azure_client = AsyncAzureOpenAI(**azure_client_params)
|
azure_client = AsyncAzureOpenAI(**azure_client_params)
|
||||||
response = await azure_client.chat.completions.create(**data)
|
response = await azure_client.chat.completions.create(**data)
|
||||||
|
response.model = "azure" + str(response.model)
|
||||||
return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
|
return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if isinstance(e,httpx.TimeoutException):
|
if isinstance(e,httpx.TimeoutException):
|
||||||
|
@ -335,7 +337,7 @@ class AzureChatCompletion(BaseLLM):
|
||||||
)
|
)
|
||||||
model_response["object"] = "list"
|
model_response["object"] = "list"
|
||||||
model_response["data"] = output_data
|
model_response["data"] = output_data
|
||||||
model_response["model"] = model
|
model_response["model"] = "azure/" + model
|
||||||
model_response["usage"] = embedding_response["usage"]
|
model_response["usage"] = embedding_response["usage"]
|
||||||
return model_response
|
return model_response
|
||||||
except AzureOpenAIError as e:
|
except AzureOpenAIError as e:
|
||||||
|
|
|
@ -68,7 +68,7 @@ def test_cost_azure_gpt_35():
|
||||||
id='chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac',
|
id='chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac',
|
||||||
choices=[Choices(finish_reason=None, index=0,
|
choices=[Choices(finish_reason=None, index=0,
|
||||||
message=Message(content=' Sure! Here is a short poem about the sky:\n\nA canvas of blue, a', role='assistant'))],
|
message=Message(content=' Sure! Here is a short poem about the sky:\n\nA canvas of blue, a', role='assistant'))],
|
||||||
model='chatGPT-deployment-LiteLLM-isAMAZING',
|
model='azure/gpt-35-turbo', # azure always has model written like this
|
||||||
usage=Usage(prompt_tokens=21, completion_tokens=17, total_tokens=38)
|
usage=Usage(prompt_tokens=21, completion_tokens=17, total_tokens=38)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -1602,6 +1602,15 @@ def cost_per_token(model="", prompt_tokens=0, completion_tokens=0):
|
||||||
prompt_tokens_cost_usd_dollar = 0
|
prompt_tokens_cost_usd_dollar = 0
|
||||||
completion_tokens_cost_usd_dollar = 0
|
completion_tokens_cost_usd_dollar = 0
|
||||||
model_cost_ref = litellm.model_cost
|
model_cost_ref = litellm.model_cost
|
||||||
|
|
||||||
|
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
|
||||||
|
azure_llms = {
|
||||||
|
"gpt-35-turbo": "azure/gpt-3.5-turbo",
|
||||||
|
"gpt-35-turbo-16k": "azure/gpt-3.5-turbo-16k",
|
||||||
|
"gpt-35-turbo-instruct": "azure/gpt-3.5-turbo-instruct"
|
||||||
|
}
|
||||||
|
if "azure/" in model:
|
||||||
|
model = model.replace("azure/", "")
|
||||||
if model in model_cost_ref:
|
if model in model_cost_ref:
|
||||||
prompt_tokens_cost_usd_dollar = (
|
prompt_tokens_cost_usd_dollar = (
|
||||||
model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
|
model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
|
||||||
|
@ -1619,6 +1628,14 @@ def cost_per_token(model="", prompt_tokens=0, completion_tokens=0):
|
||||||
model_cost_ref["ft:gpt-3.5-turbo"]["output_cost_per_token"] * completion_tokens
|
model_cost_ref["ft:gpt-3.5-turbo"]["output_cost_per_token"] * completion_tokens
|
||||||
)
|
)
|
||||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
|
elif model in azure_llms:
|
||||||
|
model = azure_llms[model]
|
||||||
|
prompt_tokens_cost_usd_dollar = (
|
||||||
|
model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
|
||||||
|
)
|
||||||
|
completion_tokens_cost_usd_dollar = (
|
||||||
|
model_cost_ref[model]["output_cost_per_token"] * completion_tokens
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
# calculate average input cost, azure/gpt-deployments can potentially go here if users don't specify, gpt-4, gpt-3.5-turbo. LLMs litellm knows
|
# calculate average input cost, azure/gpt-deployments can potentially go here if users don't specify, gpt-4, gpt-3.5-turbo. LLMs litellm knows
|
||||||
input_cost_sum = 0
|
input_cost_sum = 0
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue