From 03b5399f86d2ae6f61505952f27e12d67267172c Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Sat, 19 Apr 2025 12:32:38 -0700 Subject: [PATCH] =?UTF-8?q?test(utils.py):=20handle=20scenario=20where=20t?= =?UTF-8?q?ext=20tokens=20+=20reasoning=20tokens=20=E2=80=A6=20(#10165)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * test(utils.py): handle scenario where text tokens + reasoning tokens set, but reasoning tokens not charged separately Addresses https://github.com/BerriAI/litellm/pull/10141#discussion_r2051555332 * fix(vertex_and_google_ai_studio.py): only set content if non-empty str --- .../litellm_core_utils/llm_cost_calc/utils.py | 32 +++++++++------ .../vertex_and_google_ai_studio_gemini.py | 17 ++++---- ...odel_prices_and_context_window_backup.json | 29 ------------- .../llm_cost_calc/test_llm_cost_calc_utils.py | 41 +++++++++++++++++++ ...test_vertex_and_google_ai_studio_gemini.py | 20 +++++++++ 5 files changed, 90 insertions(+), 49 deletions(-) diff --git a/litellm/litellm_core_utils/llm_cost_calc/utils.py b/litellm/litellm_core_utils/llm_cost_calc/utils.py index 15b8282820..48809fe856 100644 --- a/litellm/litellm_core_utils/llm_cost_calc/utils.py +++ b/litellm/litellm_core_utils/llm_cost_calc/utils.py @@ -265,9 +265,10 @@ def generic_cost_per_token( ) ## CALCULATE OUTPUT COST - text_tokens = usage.completion_tokens + text_tokens = 0 audio_tokens = 0 reasoning_tokens = 0 + is_text_tokens_total = False if usage.completion_tokens_details is not None: audio_tokens = ( cast( @@ -281,7 +282,7 @@ def generic_cost_per_token( Optional[int], getattr(usage.completion_tokens_details, "text_tokens", None), ) - or usage.completion_tokens # default to completion tokens, if this field is not set + or 0 # default to completion tokens, if this field is not set ) reasoning_tokens = ( cast( @@ -290,6 +291,11 @@ def generic_cost_per_token( ) or 0 ) + + if text_tokens == 0: + text_tokens = usage.completion_tokens + if text_tokens == usage.completion_tokens: + is_text_tokens_total = True ## TEXT COST completion_cost = float(text_tokens) * completion_base_cost @@ -302,19 +308,21 @@ def generic_cost_per_token( ) ## AUDIO COST - if ( - _output_cost_per_audio_token is not None - and audio_tokens is not None - and audio_tokens > 0 - ): + if not is_text_tokens_total and audio_tokens is not None and audio_tokens > 0: + _output_cost_per_audio_token = ( + _output_cost_per_audio_token + if _output_cost_per_audio_token is not None + else completion_base_cost + ) completion_cost += float(audio_tokens) * _output_cost_per_audio_token ## REASONING COST - if ( - _output_cost_per_reasoning_token is not None - and reasoning_tokens - and reasoning_tokens > 0 - ): + if not is_text_tokens_total and reasoning_tokens and reasoning_tokens > 0: + _output_cost_per_reasoning_token = ( + _output_cost_per_reasoning_token + if _output_cost_per_reasoning_token is not None + else completion_base_cost + ) completion_cost += float(reasoning_tokens) * _output_cost_per_reasoning_token return prompt_cost, completion_cost diff --git a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py index ba57c8d225..d4c74f4910 100644 --- a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py +++ b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py @@ -587,14 +587,15 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig): _content_str += "data:{};base64,{}".format( part["inlineData"]["mimeType"], part["inlineData"]["data"] ) - if part.get("thought") is True: - if reasoning_content_str is None: - reasoning_content_str = "" - reasoning_content_str += _content_str - else: - if content_str is None: - content_str = "" - content_str += _content_str + if len(_content_str) > 0: + if part.get("thought") is True: + if reasoning_content_str is None: + reasoning_content_str = "" + reasoning_content_str += _content_str + else: + if content_str is None: + content_str = "" + content_str += _content_str return content_str, reasoning_content_str diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index b583de3b44..40d399aa68 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -4979,35 +4979,6 @@ "supports_tool_choice": true }, "gemini-2.5-pro-exp-03-25": { - "max_tokens": 65536, - "max_input_tokens": 1048576, - "max_output_tokens": 65536, - "max_images_per_prompt": 3000, - "max_videos_per_prompt": 10, - "max_video_length": 1, - "max_audio_length_hours": 8.4, - "max_audio_per_prompt": 1, - "max_pdf_size_mb": 30, - "input_cost_per_token": 0, - "input_cost_per_token_above_200k_tokens": 0, - "output_cost_per_token": 0, - "output_cost_per_token_above_200k_tokens": 0, - "litellm_provider": "vertex_ai-language-models", - "mode": "chat", - "supports_system_messages": true, - "supports_function_calling": true, - "supports_vision": true, - "supports_audio_input": true, - "supports_video_input": true, - "supports_pdf_input": true, - "supports_response_schema": true, - "supports_tool_choice": true, - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], - "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" - }, - "gemini-2.5-pro-preview-03-25": { "max_tokens": 65536, "max_input_tokens": 1048576, "max_output_tokens": 65536, diff --git a/tests/litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py b/tests/litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py index ab501f5821..7df783e719 100644 --- a/tests/litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py +++ b/tests/litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py @@ -26,6 +26,47 @@ from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_toke from litellm.types.utils import Usage +def test_reasoning_tokens_no_price_set(): + model = "o1-mini" + custom_llm_provider = "openai" + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + model_cost_map = litellm.model_cost[model] + usage = Usage( + completion_tokens=1578, + prompt_tokens=17, + total_tokens=1595, + completion_tokens_details=CompletionTokensDetailsWrapper( + accepted_prediction_tokens=None, + audio_tokens=None, + reasoning_tokens=952, + rejected_prediction_tokens=None, + text_tokens=626, + ), + prompt_tokens_details=PromptTokensDetailsWrapper( + audio_tokens=None, cached_tokens=None, text_tokens=17, image_tokens=None + ), + ) + prompt_cost, completion_cost = generic_cost_per_token( + model=model, + usage=usage, + custom_llm_provider="openai", + ) + assert round(prompt_cost, 10) == round( + model_cost_map["input_cost_per_token"] * usage.prompt_tokens, + 10, + ) + print(f"completion_cost: {completion_cost}") + expected_completion_cost = ( + model_cost_map["output_cost_per_token"] * usage.completion_tokens + ) + print(f"expected_completion_cost: {expected_completion_cost}") + assert round(completion_cost, 10) == round( + expected_completion_cost, + 10, + ) + + def test_reasoning_tokens_gemini(): model = "gemini-2.5-flash-preview-04-17" custom_llm_provider = "gemini" diff --git a/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py b/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py index bb00ff3ba0..0c6a95a97b 100644 --- a/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py +++ b/tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py @@ -239,3 +239,23 @@ def test_vertex_ai_thinking_output_part(): content, reasoning_content = v.get_assistant_content_message(parts=parts) assert content == "Hello world" assert reasoning_content == "I'm thinking..." + + +def test_vertex_ai_empty_content(): + from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import ( + VertexGeminiConfig, + ) + from litellm.types.llms.vertex_ai import HttpxPartType + + v = VertexGeminiConfig() + parts = [ + HttpxPartType( + functionCall={ + "name": "get_current_weather", + "arguments": "{}", + }, + ), + ] + content, reasoning_content = v.get_assistant_content_message(parts=parts) + assert content is None + assert reasoning_content is None