mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
Cost tracking improvements (#5828)
* feat(litellm_logging.py): update standard logging payload to include debug information for cost failures Also includes fixes for cohere rerank cost tracking + databricks llama2 model cost tracking Easier to repro cost failures and improve reliability in prod * fix(proxy_server.py): emit cost failure debug info for slack alerting Improves debug information for cost tracking failures, on slack alerting
This commit is contained in:
parent
8039b95aaf
commit
2488e4b45f
6 changed files with 117 additions and 45 deletions
|
@ -250,6 +250,13 @@ def cost_per_token(
|
|||
)
|
||||
)
|
||||
return prompt_cost, completion_cost
|
||||
elif call_type == "arerank" or call_type == "rerank":
|
||||
completion_tokens_cost_usd_dollar = rerank_cost(
|
||||
model=model,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
prompt_tokens_cost_usd_dollar = 0
|
||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||
elif model in model_cost_ref:
|
||||
print_verbose(f"Success: model={model} in model_cost_map")
|
||||
print_verbose(
|
||||
|
@ -689,7 +696,18 @@ def completion_cost(
|
|||
call_type == CallTypes.speech.value or call_type == CallTypes.aspeech.value
|
||||
):
|
||||
prompt_characters = litellm.utils._count_characters(text=prompt)
|
||||
|
||||
elif (
|
||||
call_type == CallTypes.rerank.value or call_type == CallTypes.arerank.value
|
||||
):
|
||||
if completion_response is not None and isinstance(
|
||||
completion_response, RerankResponse
|
||||
):
|
||||
meta_obj = completion_response.meta
|
||||
billed_units = meta_obj.get("billed_units", {}) or {}
|
||||
search_units = (
|
||||
billed_units.get("search_units") or 1
|
||||
) # cohere charges per request by default.
|
||||
completion_tokens = search_units
|
||||
# Calculate cost based on prompt_tokens, completion_tokens
|
||||
if (
|
||||
"togethercomputer" in model
|
||||
|
@ -794,7 +812,7 @@ def response_cost_calculator(
|
|||
) -> Optional[float]:
|
||||
"""
|
||||
Returns
|
||||
- float or None: cost of response OR none if error.
|
||||
- float or None: cost of response
|
||||
"""
|
||||
try:
|
||||
response_cost: float = 0.0
|
||||
|
@ -810,15 +828,6 @@ def response_cost_calculator(
|
|||
call_type=call_type,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
elif isinstance(response_object, RerankResponse) and (
|
||||
call_type == "arerank" or call_type == "rerank"
|
||||
):
|
||||
response_cost = rerank_cost(
|
||||
rerank_response=response_object,
|
||||
model=model,
|
||||
call_type=call_type,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
else:
|
||||
if custom_pricing is True: # override defaults if custom pricing is set
|
||||
base_model = model
|
||||
|
@ -831,24 +840,12 @@ def response_cost_calculator(
|
|||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
return response_cost
|
||||
except litellm.NotFoundError as e:
|
||||
verbose_logger.debug( # debug since it can be spammy in logs, for calls
|
||||
f"Model={model} for LLM Provider={custom_llm_provider} not found in completion cost map."
|
||||
)
|
||||
return None
|
||||
except Exception as e:
|
||||
verbose_logger.debug(
|
||||
"litellm.cost_calculator.py::response_cost_calculator - Returning None. Exception occurred - {}/n{}".format(
|
||||
str(e), traceback.format_exc()
|
||||
)
|
||||
)
|
||||
return None
|
||||
raise e
|
||||
|
||||
|
||||
def rerank_cost(
|
||||
rerank_response: RerankResponse,
|
||||
model: str,
|
||||
call_type: Literal["rerank", "arerank"],
|
||||
custom_llm_provider: Optional[str],
|
||||
) -> float:
|
||||
"""
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue