fix(utils.py): fix openai-like api response format parsing (#7273)

* fix(utils.py): fix openai-like api response format parsing Fixes issue passing structured output to litellm_proxy/ route * fix(cost_calculator.py): fix whisper transcription cost calc to use file duration, not response time ' * test: skip test if credentials not found
2025-04-27 11:43:54 +00:00 · 2024-12-17 12:49:09 -08:00 · 2024-12-17 12:49:09 -08:00 · f628290ce7
commit f628290ce7
parent 8212af0ac1
6 changed files with 134 additions and 90 deletions
--- a/litellm/llms/openai/cost_calculation.py
+++ b/litellm/llms/openai/cost_calculation.py
@ -78,36 +78,44 @@ def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:


 def cost_per_second(
-    model: str, usage: Usage, response_time_ms: Optional[float] = 0.0
+    model: str, custom_llm_provider: Optional[str], duration: float = 0.0
 ) -> Tuple[float, float]:
    """
    Calculates the cost per second for a given model, prompt tokens, and completion tokens.
+
+    Input:
+        - model: str, the model name without provider prefix
+        - custom_llm_provider: str, the custom llm provider
+        - duration: float, the duration of the response in seconds
+
+    Returns:
+        Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
    """
    ## GET MODEL INFO
-    model_info = get_model_info(model=model, custom_llm_provider="openai")
+    model_info = get_model_info(
+        model=model, custom_llm_provider=custom_llm_provider or "openai"
+    )
    prompt_cost = 0.0
    completion_cost = 0.0
    ## Speech / Audio cost calculation
    if (
        "output_cost_per_second" in model_info
        and model_info["output_cost_per_second"] is not None
-        and response_time_ms is not None
    ):
        verbose_logger.debug(
-            f"For model={model} - output_cost_per_second: {model_info.get('output_cost_per_second')}; response time: {response_time_ms}"
+            f"For model={model} - output_cost_per_second: {model_info.get('output_cost_per_second')}; duration: {duration}"
        )
        ## COST PER SECOND ##
-        completion_cost = model_info["output_cost_per_second"] * response_time_ms / 1000
+        completion_cost = model_info["output_cost_per_second"] * duration
    elif (
        "input_cost_per_second" in model_info
        and model_info["input_cost_per_second"] is not None
-        and response_time_ms is not None
    ):
        verbose_logger.debug(
-            f"For model={model} - input_cost_per_second: {model_info.get('input_cost_per_second')}; response time: {response_time_ms}"
+            f"For model={model} - input_cost_per_second: {model_info.get('input_cost_per_second')}; duration: {duration}"
        )
        ## COST PER SECOND ##
-        prompt_cost = model_info["input_cost_per_second"] * response_time_ms / 1000
+        prompt_cost = model_info["input_cost_per_second"] * duration
        completion_cost = 0.0

    return prompt_cost, completion_cost