mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
fix(utils.py): fix openai-like api response format parsing (#7273)
* fix(utils.py): fix openai-like api response format parsing Fixes issue passing structured output to litellm_proxy/ route * fix(cost_calculator.py): fix whisper transcription cost calc to use file duration, not response time ' * test: skip test if credentials not found
This commit is contained in:
parent
8212af0ac1
commit
f628290ce7
6 changed files with 134 additions and 90 deletions
|
@ -78,36 +78,44 @@ def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
|
|||
|
||||
|
||||
def cost_per_second(
|
||||
model: str, usage: Usage, response_time_ms: Optional[float] = 0.0
|
||||
model: str, custom_llm_provider: Optional[str], duration: float = 0.0
|
||||
) -> Tuple[float, float]:
|
||||
"""
|
||||
Calculates the cost per second for a given model, prompt tokens, and completion tokens.
|
||||
|
||||
Input:
|
||||
- model: str, the model name without provider prefix
|
||||
- custom_llm_provider: str, the custom llm provider
|
||||
- duration: float, the duration of the response in seconds
|
||||
|
||||
Returns:
|
||||
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
|
||||
"""
|
||||
## GET MODEL INFO
|
||||
model_info = get_model_info(model=model, custom_llm_provider="openai")
|
||||
model_info = get_model_info(
|
||||
model=model, custom_llm_provider=custom_llm_provider or "openai"
|
||||
)
|
||||
prompt_cost = 0.0
|
||||
completion_cost = 0.0
|
||||
## Speech / Audio cost calculation
|
||||
if (
|
||||
"output_cost_per_second" in model_info
|
||||
and model_info["output_cost_per_second"] is not None
|
||||
and response_time_ms is not None
|
||||
):
|
||||
verbose_logger.debug(
|
||||
f"For model={model} - output_cost_per_second: {model_info.get('output_cost_per_second')}; response time: {response_time_ms}"
|
||||
f"For model={model} - output_cost_per_second: {model_info.get('output_cost_per_second')}; duration: {duration}"
|
||||
)
|
||||
## COST PER SECOND ##
|
||||
completion_cost = model_info["output_cost_per_second"] * response_time_ms / 1000
|
||||
completion_cost = model_info["output_cost_per_second"] * duration
|
||||
elif (
|
||||
"input_cost_per_second" in model_info
|
||||
and model_info["input_cost_per_second"] is not None
|
||||
and response_time_ms is not None
|
||||
):
|
||||
verbose_logger.debug(
|
||||
f"For model={model} - input_cost_per_second: {model_info.get('input_cost_per_second')}; response time: {response_time_ms}"
|
||||
f"For model={model} - input_cost_per_second: {model_info.get('input_cost_per_second')}; duration: {duration}"
|
||||
)
|
||||
## COST PER SECOND ##
|
||||
prompt_cost = model_info["input_cost_per_second"] * response_time_ms / 1000
|
||||
prompt_cost = model_info["input_cost_per_second"] * duration
|
||||
completion_cost = 0.0
|
||||
|
||||
return prompt_cost, completion_cost
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue