test(utils.py): handle scenario where text tokens + reasoning tokens set, but reasoning tokens not charged separately

Addresses https://github.com/BerriAI/litellm/pull/10141#discussion_r2051555332
2025-04-25 18:54:30 +00:00 · 2025-04-19 12:14:15 -07:00 · 2025-04-19 12:14:15 -07:00 · dacc712522
commit dacc712522
parent 99db1b7690
3 changed files with 61 additions and 41 deletions
--- a/litellm/litellm_core_utils/llm_cost_calc/utils.py
+++ b/litellm/litellm_core_utils/llm_cost_calc/utils.py
@ -265,9 +265,10 @@ def generic_cost_per_token(
    )
    ## CALCULATE OUTPUT COST
-    text_tokens = usage.completion_tokens
+    text_tokens = 0
    audio_tokens = 0
    reasoning_tokens = 0
    is_text_tokens_total = False
    if usage.completion_tokens_details is not None:
        audio_tokens = (
            cast(
@ -281,7 +282,7 @@ def generic_cost_per_token(
                Optional[int],
                getattr(usage.completion_tokens_details, "text_tokens", None),
            )
-            or usage.completion_tokens  # default to completion tokens, if this field is not set
+            or 0  # default to completion tokens, if this field is not set
        )
        reasoning_tokens = (
            cast(
@ -290,6 +291,11 @@ def generic_cost_per_token(
            )
            or 0
        )
    if text_tokens == 0:
        text_tokens = usage.completion_tokens
    if text_tokens == usage.completion_tokens:
        is_text_tokens_total = True
    ## TEXT COST
    completion_cost = float(text_tokens) * completion_base_cost
@ -302,19 +308,21 @@ def generic_cost_per_token(
    )
    ## AUDIO COST
-    if (
+    if not is_text_tokens_total and audio_tokens is not None and audio_tokens > 0:
-        _output_cost_per_audio_token is not None
+        _output_cost_per_audio_token = (
-        and audio_tokens is not None
+            _output_cost_per_audio_token
-        and audio_tokens > 0
+            if _output_cost_per_audio_token is not None
-    ):
+            else completion_base_cost
        )
        completion_cost += float(audio_tokens) * _output_cost_per_audio_token
    ## REASONING COST
-    if (
+    if not is_text_tokens_total and reasoning_tokens and reasoning_tokens > 0:
-        _output_cost_per_reasoning_token is not None
+        _output_cost_per_reasoning_token = (
-        and reasoning_tokens
+            _output_cost_per_reasoning_token
-        and reasoning_tokens > 0
+            if _output_cost_per_reasoning_token is not None
-    ):
+            else completion_base_cost
        )
        completion_cost += float(reasoning_tokens) * _output_cost_per_reasoning_token
    return prompt_cost, completion_cost
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -4979,35 +4979,6 @@
        "supports_tool_choice": true
    },
    "gemini-2.5-pro-exp-03-25": {
        "max_tokens": 65536,
        "max_input_tokens": 1048576,
        "max_output_tokens": 65536,
        "max_images_per_prompt": 3000,
        "max_videos_per_prompt": 10,
        "max_video_length": 1,
        "max_audio_length_hours": 8.4,
        "max_audio_per_prompt": 1,
        "max_pdf_size_mb": 30,
        "input_cost_per_token": 0,
        "input_cost_per_token_above_200k_tokens": 0,
        "output_cost_per_token": 0,
        "output_cost_per_token_above_200k_tokens": 0,
        "litellm_provider": "vertex_ai-language-models",
        "mode": "chat",
        "supports_system_messages": true,
        "supports_function_calling": true,
        "supports_vision": true,
        "supports_audio_input": true,
        "supports_video_input": true,
        "supports_pdf_input": true,
        "supports_response_schema": true,
        "supports_tool_choice": true,
        "supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
        "supported_modalities": ["text", "image", "audio", "video"],
        "supported_output_modalities": ["text"],
        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
    },
    "gemini-2.5-pro-preview-03-25": {
        "max_tokens": 65536,
        "max_input_tokens": 1048576,
        "max_output_tokens": 65536,
--- a/tests/litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py
+++ b/tests/litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py
@ -26,6 +26,47 @@ from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_toke
 from litellm.types.utils import Usage
 def test_reasoning_tokens_no_price_set():
    model = "o1-mini"
    custom_llm_provider = "openai"
    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
    litellm.model_cost = litellm.get_model_cost_map(url="")
    model_cost_map = litellm.model_cost[model]
    usage = Usage(
        completion_tokens=1578,
        prompt_tokens=17,
        total_tokens=1595,
        completion_tokens_details=CompletionTokensDetailsWrapper(
            accepted_prediction_tokens=None,
            audio_tokens=None,
            reasoning_tokens=952,
            rejected_prediction_tokens=None,
            text_tokens=626,
        ),
        prompt_tokens_details=PromptTokensDetailsWrapper(
            audio_tokens=None, cached_tokens=None, text_tokens=17, image_tokens=None
        ),
    )
    prompt_cost, completion_cost = generic_cost_per_token(
        model=model,
        usage=usage,
        custom_llm_provider="openai",
    )
    assert round(prompt_cost, 10) == round(
        model_cost_map["input_cost_per_token"] * usage.prompt_tokens,
        10,
    )
    print(f"completion_cost: {completion_cost}")
    expected_completion_cost = (
        model_cost_map["output_cost_per_token"] * usage.completion_tokens
    )
    print(f"expected_completion_cost: {expected_completion_cost}")
    assert round(completion_cost, 10) == round(
        expected_completion_cost,
        10,
    )
 def test_reasoning_tokens_gemini():
    model = "gemini-2.5-flash-preview-04-17"
    custom_llm_provider = "gemini"