Cost tracking improvements (#5828)

* feat(litellm_logging.py): update standard logging payload to include debug information for cost failures

Also includes fixes for cohere rerank cost tracking + databricks llama2 model cost tracking

 Easier to repro cost failures and improve reliability in prod

* fix(proxy_server.py): emit cost failure debug info for slack alerting

Improves debug information for cost tracking failures, on slack alerting
This commit is contained in:
Krish Dholakia 2024-09-21 21:47:50 -07:00 committed by GitHub
parent 8039b95aaf
commit 2488e4b45f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 117 additions and 45 deletions

View file

@ -250,6 +250,13 @@ def cost_per_token(
)
)
return prompt_cost, completion_cost
elif call_type == "arerank" or call_type == "rerank":
completion_tokens_cost_usd_dollar = rerank_cost(
model=model,
custom_llm_provider=custom_llm_provider,
)
prompt_tokens_cost_usd_dollar = 0
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
elif model in model_cost_ref:
print_verbose(f"Success: model={model} in model_cost_map")
print_verbose(
@ -689,7 +696,18 @@ def completion_cost(
call_type == CallTypes.speech.value or call_type == CallTypes.aspeech.value
):
prompt_characters = litellm.utils._count_characters(text=prompt)
elif (
call_type == CallTypes.rerank.value or call_type == CallTypes.arerank.value
):
if completion_response is not None and isinstance(
completion_response, RerankResponse
):
meta_obj = completion_response.meta
billed_units = meta_obj.get("billed_units", {}) or {}
search_units = (
billed_units.get("search_units") or 1
) # cohere charges per request by default.
completion_tokens = search_units
# Calculate cost based on prompt_tokens, completion_tokens
if (
"togethercomputer" in model
@ -794,7 +812,7 @@ def response_cost_calculator(
) -> Optional[float]:
"""
Returns
- float or None: cost of response OR none if error.
- float or None: cost of response
"""
try:
response_cost: float = 0.0
@ -810,15 +828,6 @@ def response_cost_calculator(
call_type=call_type,
custom_llm_provider=custom_llm_provider,
)
elif isinstance(response_object, RerankResponse) and (
call_type == "arerank" or call_type == "rerank"
):
response_cost = rerank_cost(
rerank_response=response_object,
model=model,
call_type=call_type,
custom_llm_provider=custom_llm_provider,
)
else:
if custom_pricing is True: # override defaults if custom pricing is set
base_model = model
@ -831,24 +840,12 @@ def response_cost_calculator(
custom_llm_provider=custom_llm_provider,
)
return response_cost
except litellm.NotFoundError as e:
verbose_logger.debug( # debug since it can be spammy in logs, for calls
f"Model={model} for LLM Provider={custom_llm_provider} not found in completion cost map."
)
return None
except Exception as e:
verbose_logger.debug(
"litellm.cost_calculator.py::response_cost_calculator - Returning None. Exception occurred - {}/n{}".format(
str(e), traceback.format_exc()
)
)
return None
raise e
def rerank_cost(
rerank_response: RerankResponse,
model: str,
call_type: Literal["rerank", "arerank"],
custom_llm_provider: Optional[str],
) -> float:
"""