mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
(feat) use custom_llm_provider in completion_cost
This commit is contained in:
parent
70426cad76
commit
53fd62b0cd
1 changed files with 22 additions and 1 deletions
|
@ -2780,7 +2780,9 @@ def token_counter(
|
||||||
return num_tokens
|
return num_tokens
|
||||||
|
|
||||||
|
|
||||||
def cost_per_token(model="", prompt_tokens=0, completion_tokens=0):
|
def cost_per_token(
|
||||||
|
model="", prompt_tokens=0, completion_tokens=0, custom_llm_provider=None
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
|
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
|
||||||
|
|
||||||
|
@ -2796,6 +2798,7 @@ def cost_per_token(model="", prompt_tokens=0, completion_tokens=0):
|
||||||
prompt_tokens_cost_usd_dollar = 0
|
prompt_tokens_cost_usd_dollar = 0
|
||||||
completion_tokens_cost_usd_dollar = 0
|
completion_tokens_cost_usd_dollar = 0
|
||||||
model_cost_ref = litellm.model_cost
|
model_cost_ref = litellm.model_cost
|
||||||
|
model_with_provider = custom_llm_provider + "/" + model
|
||||||
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
|
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
|
||||||
print_verbose(f"Looking up model={model} in model_cost_map")
|
print_verbose(f"Looking up model={model} in model_cost_map")
|
||||||
|
|
||||||
|
@ -2807,6 +2810,16 @@ def cost_per_token(model="", prompt_tokens=0, completion_tokens=0):
|
||||||
model_cost_ref[model]["output_cost_per_token"] * completion_tokens
|
model_cost_ref[model]["output_cost_per_token"] * completion_tokens
|
||||||
)
|
)
|
||||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
|
elif model_with_provider in model_cost_ref:
|
||||||
|
print_verbose(f"Looking up model={model_with_provider} in model_cost_map")
|
||||||
|
prompt_tokens_cost_usd_dollar = (
|
||||||
|
model_cost_ref[model_with_provider]["input_cost_per_token"] * prompt_tokens
|
||||||
|
)
|
||||||
|
completion_tokens_cost_usd_dollar = (
|
||||||
|
model_cost_ref[model_with_provider]["output_cost_per_token"]
|
||||||
|
* completion_tokens
|
||||||
|
)
|
||||||
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
elif "ft:gpt-3.5-turbo" in model:
|
elif "ft:gpt-3.5-turbo" in model:
|
||||||
print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
|
print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
|
||||||
# fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm
|
# fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm
|
||||||
|
@ -2890,6 +2903,7 @@ def completion_cost(
|
||||||
# Handle Inputs to completion_cost
|
# Handle Inputs to completion_cost
|
||||||
prompt_tokens = 0
|
prompt_tokens = 0
|
||||||
completion_tokens = 0
|
completion_tokens = 0
|
||||||
|
custom_llm_provider = None
|
||||||
if completion_response is not None:
|
if completion_response is not None:
|
||||||
# get input/output tokens from completion_response
|
# get input/output tokens from completion_response
|
||||||
prompt_tokens = completion_response.get("usage", {}).get("prompt_tokens", 0)
|
prompt_tokens = completion_response.get("usage", {}).get("prompt_tokens", 0)
|
||||||
|
@ -2899,6 +2913,12 @@ def completion_cost(
|
||||||
model = (
|
model = (
|
||||||
model or completion_response["model"]
|
model or completion_response["model"]
|
||||||
) # check if user passed an override for model, if it's none check completion_response['model']
|
) # check if user passed an override for model, if it's none check completion_response['model']
|
||||||
|
if completion_response is not None and hasattr(
|
||||||
|
completion_response, "_hidden_params"
|
||||||
|
):
|
||||||
|
custom_llm_provider = completion_response._hidden_params.get(
|
||||||
|
"custom_llm_provider", ""
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
if len(messages) > 0:
|
if len(messages) > 0:
|
||||||
prompt_tokens = token_counter(model=model, messages=messages)
|
prompt_tokens = token_counter(model=model, messages=messages)
|
||||||
|
@ -2926,6 +2946,7 @@ def completion_cost(
|
||||||
model=model,
|
model=model,
|
||||||
prompt_tokens=prompt_tokens,
|
prompt_tokens=prompt_tokens,
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
)
|
)
|
||||||
return prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
return prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue