fix(utils.py): fix custom pricing when litellm model != response obj model name

This commit is contained in:
Krrish Dholakia 2024-05-13 15:24:56 -07:00
parent 1be6ea0c0d
commit b4a8665d11
4 changed files with 87 additions and 5 deletions

View file

@ -1083,6 +1083,8 @@ class CallTypes(Enum):
class Logging:
global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, capture_exception, add_breadcrumb, lunaryLogger
custom_pricing: bool = False
def __init__(
self,
model,
@ -1165,6 +1167,15 @@ class Logging:
**additional_params,
}
## check if custom pricing set ##
if (
litellm_params.get("input_cost_per_token") is not None
or litellm_params.get("input_cost_per_second") is not None
or litellm_params.get("output_cost_per_token") is not None
or litellm_params.get("output_cost_per_second") is not None
):
self.custom_pricing = True
def _pre_call(self, input, api_key, model=None, additional_args={}):
"""
Common helper function across the sync + async pre-call function
@ -1442,10 +1453,18 @@ class Logging:
)
)
else:
base_model: Optional[str] = None
# check if base_model set on azure
base_model = _get_base_model_from_metadata(
model_call_details=self.model_call_details
)
# litellm model name
litellm_model = self.model_call_details["model"]
if (
litellm_model in litellm.model_cost
and self.custom_pricing == True
):
base_model = litellm_model
# base_model defaults to None if not set on model_info
self.model_call_details["response_cost"] = (
litellm.completion_cost(
@ -4365,7 +4384,7 @@ def completion_cost(
size=None,
quality=None,
n=None, # number of images
):
) -> float:
"""
Calculate the cost of a given completion call fot GPT-3.5-turbo, llama2, any litellm supported llm.
@ -4386,10 +4405,10 @@ def completion_cost(
- If completion_response is not provided, the function calculates token counts based on the model and input text.
- The cost is calculated based on the model, prompt tokens, and completion tokens.
- For certain models containing "togethercomputer" in the name, prices are based on the model size.
- For Replicate models, the cost is calculated based on the total time used for the request.
- For un-mapped Replicate models, the cost is calculated based on the total time used for the request.
Exceptions:
- If an error occurs during execution, the function returns 0.0 without blocking the user's execution path.
- If an error occurs during execution, the error is raised
"""
try:
if (
@ -4701,6 +4720,10 @@ def get_litellm_params(
acompletion=None,
preset_cache_key=None,
no_log=None,
input_cost_per_second=None,
input_cost_per_token=None,
output_cost_per_token=None,
output_cost_per_second=None,
):
litellm_params = {
"acompletion": acompletion,
@ -4719,6 +4742,10 @@ def get_litellm_params(
"preset_cache_key": preset_cache_key,
"no-log": no_log,
"stream_response": {}, # litellm_call_id: ModelResponse Dict
"input_cost_per_token": input_cost_per_token,
"input_cost_per_second": input_cost_per_second,
"output_cost_per_token": output_cost_per_token,
"output_cost_per_second": output_cost_per_second,
}
return litellm_params