test(utils.py): handle scenario where text tokens + reasoning tokens … (#10165)

* test(utils.py): handle scenario where text tokens + reasoning tokens set, but reasoning tokens not charged separately

Addresses https://github.com/BerriAI/litellm/pull/10141#discussion_r2051555332

* fix(vertex_and_google_ai_studio.py): only set content if non-empty str
This commit is contained in:
Krish Dholakia 2025-04-19 12:32:38 -07:00 committed by GitHub
parent df57965f8f
commit 518a4900f6
5 changed files with 90 additions and 49 deletions

View file

@ -265,9 +265,10 @@ def generic_cost_per_token(
)
## CALCULATE OUTPUT COST
text_tokens = usage.completion_tokens
text_tokens = 0
audio_tokens = 0
reasoning_tokens = 0
is_text_tokens_total = False
if usage.completion_tokens_details is not None:
audio_tokens = (
cast(
@ -281,7 +282,7 @@ def generic_cost_per_token(
Optional[int],
getattr(usage.completion_tokens_details, "text_tokens", None),
)
or usage.completion_tokens # default to completion tokens, if this field is not set
or 0 # default to completion tokens, if this field is not set
)
reasoning_tokens = (
cast(
@ -290,6 +291,11 @@ def generic_cost_per_token(
)
or 0
)
if text_tokens == 0:
text_tokens = usage.completion_tokens
if text_tokens == usage.completion_tokens:
is_text_tokens_total = True
## TEXT COST
completion_cost = float(text_tokens) * completion_base_cost
@ -302,19 +308,21 @@ def generic_cost_per_token(
)
## AUDIO COST
if (
_output_cost_per_audio_token is not None
and audio_tokens is not None
and audio_tokens > 0
):
if not is_text_tokens_total and audio_tokens is not None and audio_tokens > 0:
_output_cost_per_audio_token = (
_output_cost_per_audio_token
if _output_cost_per_audio_token is not None
else completion_base_cost
)
completion_cost += float(audio_tokens) * _output_cost_per_audio_token
## REASONING COST
if (
_output_cost_per_reasoning_token is not None
and reasoning_tokens
and reasoning_tokens > 0
):
if not is_text_tokens_total and reasoning_tokens and reasoning_tokens > 0:
_output_cost_per_reasoning_token = (
_output_cost_per_reasoning_token
if _output_cost_per_reasoning_token is not None
else completion_base_cost
)
completion_cost += float(reasoning_tokens) * _output_cost_per_reasoning_token
return prompt_cost, completion_cost

View file

@ -587,14 +587,15 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
_content_str += "data:{};base64,{}".format(
part["inlineData"]["mimeType"], part["inlineData"]["data"]
)
if part.get("thought") is True:
if reasoning_content_str is None:
reasoning_content_str = ""
reasoning_content_str += _content_str
else:
if content_str is None:
content_str = ""
content_str += _content_str
if len(_content_str) > 0:
if part.get("thought") is True:
if reasoning_content_str is None:
reasoning_content_str = ""
reasoning_content_str += _content_str
else:
if content_str is None:
content_str = ""
content_str += _content_str
return content_str, reasoning_content_str

View file

@ -4979,35 +4979,6 @@
"supports_tool_choice": true
},
"gemini-2.5-pro-exp-03-25": {
"max_tokens": 65536,
"max_input_tokens": 1048576,
"max_output_tokens": 65536,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_token": 0,
"input_cost_per_token_above_200k_tokens": 0,
"output_cost_per_token": 0,
"output_cost_per_token_above_200k_tokens": 0,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_audio_input": true,
"supports_video_input": true,
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
},
"gemini-2.5-pro-preview-03-25": {
"max_tokens": 65536,
"max_input_tokens": 1048576,
"max_output_tokens": 65536,

View file

@ -26,6 +26,47 @@ from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_toke
from litellm.types.utils import Usage
def test_reasoning_tokens_no_price_set():
model = "o1-mini"
custom_llm_provider = "openai"
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
model_cost_map = litellm.model_cost[model]
usage = Usage(
completion_tokens=1578,
prompt_tokens=17,
total_tokens=1595,
completion_tokens_details=CompletionTokensDetailsWrapper(
accepted_prediction_tokens=None,
audio_tokens=None,
reasoning_tokens=952,
rejected_prediction_tokens=None,
text_tokens=626,
),
prompt_tokens_details=PromptTokensDetailsWrapper(
audio_tokens=None, cached_tokens=None, text_tokens=17, image_tokens=None
),
)
prompt_cost, completion_cost = generic_cost_per_token(
model=model,
usage=usage,
custom_llm_provider="openai",
)
assert round(prompt_cost, 10) == round(
model_cost_map["input_cost_per_token"] * usage.prompt_tokens,
10,
)
print(f"completion_cost: {completion_cost}")
expected_completion_cost = (
model_cost_map["output_cost_per_token"] * usage.completion_tokens
)
print(f"expected_completion_cost: {expected_completion_cost}")
assert round(completion_cost, 10) == round(
expected_completion_cost,
10,
)
def test_reasoning_tokens_gemini():
model = "gemini-2.5-flash-preview-04-17"
custom_llm_provider = "gemini"

View file

@ -239,3 +239,23 @@ def test_vertex_ai_thinking_output_part():
content, reasoning_content = v.get_assistant_content_message(parts=parts)
assert content == "Hello world"
assert reasoning_content == "I'm thinking..."
def test_vertex_ai_empty_content():
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
VertexGeminiConfig,
)
from litellm.types.llms.vertex_ai import HttpxPartType
v = VertexGeminiConfig()
parts = [
HttpxPartType(
functionCall={
"name": "get_current_weather",
"arguments": "{}",
},
),
]
content, reasoning_content = v.get_assistant_content_message(parts=parts)
assert content is None
assert reasoning_content is None