test(utils.py): handle scenario where text tokens + reasoning tokens … (#10165)

* test(utils.py): handle scenario where text tokens + reasoning tokens set, but reasoning tokens not charged separately Addresses https://github.com/BerriAI/litellm/pull/10141#discussion_r2051555332 * fix(vertex_and_google_ai_studio.py): only set content if non-empty str
2025-04-26 03:04:13 +00:00 · 2025-04-19 12:32:38 -07:00 · 2025-04-19 12:32:38 -07:00 · 03b5399f86
commit 03b5399f86
parent 99db1b7690
5 changed files with 90 additions and 49 deletions
--- a/litellm/litellm_core_utils/llm_cost_calc/utils.py
+++ b/litellm/litellm_core_utils/llm_cost_calc/utils.py
@ -265,9 +265,10 @@ def generic_cost_per_token(
    )

    ## CALCULATE OUTPUT COST
-    text_tokens = usage.completion_tokens
+    text_tokens = 0
    audio_tokens = 0
    reasoning_tokens = 0
+    is_text_tokens_total = False
    if usage.completion_tokens_details is not None:
        audio_tokens = (
            cast(
@ -281,7 +282,7 @@ def generic_cost_per_token(
                Optional[int],
                getattr(usage.completion_tokens_details, "text_tokens", None),
            )
-            or usage.completion_tokens  # default to completion tokens, if this field is not set
+            or 0  # default to completion tokens, if this field is not set
        )
        reasoning_tokens = (
            cast(
@ -290,6 +291,11 @@ def generic_cost_per_token(
            )
            or 0
        )
+
+    if text_tokens == 0:
+        text_tokens = usage.completion_tokens
+    if text_tokens == usage.completion_tokens:
+        is_text_tokens_total = True
    ## TEXT COST
    completion_cost = float(text_tokens) * completion_base_cost

@ -302,19 +308,21 @@ def generic_cost_per_token(
    )

    ## AUDIO COST
-    if (
-        _output_cost_per_audio_token is not None
-        and audio_tokens is not None
-        and audio_tokens > 0
-    ):
+    if not is_text_tokens_total and audio_tokens is not None and audio_tokens > 0:
+        _output_cost_per_audio_token = (
+            _output_cost_per_audio_token
+            if _output_cost_per_audio_token is not None
+            else completion_base_cost
+        )
        completion_cost += float(audio_tokens) * _output_cost_per_audio_token

    ## REASONING COST
-    if (
-        _output_cost_per_reasoning_token is not None
-        and reasoning_tokens
-        and reasoning_tokens > 0
-    ):
+    if not is_text_tokens_total and reasoning_tokens and reasoning_tokens > 0:
+        _output_cost_per_reasoning_token = (
+            _output_cost_per_reasoning_token
+            if _output_cost_per_reasoning_token is not None
+            else completion_base_cost
+        )
        completion_cost += float(reasoning_tokens) * _output_cost_per_reasoning_token

    return prompt_cost, completion_cost