mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
test(utils.py): handle scenario where text tokens + reasoning tokens … (#10165)
* test(utils.py): handle scenario where text tokens + reasoning tokens set, but reasoning tokens not charged separately Addresses https://github.com/BerriAI/litellm/pull/10141#discussion_r2051555332 * fix(vertex_and_google_ai_studio.py): only set content if non-empty str
This commit is contained in:
parent
df57965f8f
commit
518a4900f6
5 changed files with 90 additions and 49 deletions
|
@ -265,9 +265,10 @@ def generic_cost_per_token(
|
|||
)
|
||||
|
||||
## CALCULATE OUTPUT COST
|
||||
text_tokens = usage.completion_tokens
|
||||
text_tokens = 0
|
||||
audio_tokens = 0
|
||||
reasoning_tokens = 0
|
||||
is_text_tokens_total = False
|
||||
if usage.completion_tokens_details is not None:
|
||||
audio_tokens = (
|
||||
cast(
|
||||
|
@ -281,7 +282,7 @@ def generic_cost_per_token(
|
|||
Optional[int],
|
||||
getattr(usage.completion_tokens_details, "text_tokens", None),
|
||||
)
|
||||
or usage.completion_tokens # default to completion tokens, if this field is not set
|
||||
or 0 # default to completion tokens, if this field is not set
|
||||
)
|
||||
reasoning_tokens = (
|
||||
cast(
|
||||
|
@ -290,6 +291,11 @@ def generic_cost_per_token(
|
|||
)
|
||||
or 0
|
||||
)
|
||||
|
||||
if text_tokens == 0:
|
||||
text_tokens = usage.completion_tokens
|
||||
if text_tokens == usage.completion_tokens:
|
||||
is_text_tokens_total = True
|
||||
## TEXT COST
|
||||
completion_cost = float(text_tokens) * completion_base_cost
|
||||
|
||||
|
@ -302,19 +308,21 @@ def generic_cost_per_token(
|
|||
)
|
||||
|
||||
## AUDIO COST
|
||||
if (
|
||||
_output_cost_per_audio_token is not None
|
||||
and audio_tokens is not None
|
||||
and audio_tokens > 0
|
||||
):
|
||||
if not is_text_tokens_total and audio_tokens is not None and audio_tokens > 0:
|
||||
_output_cost_per_audio_token = (
|
||||
_output_cost_per_audio_token
|
||||
if _output_cost_per_audio_token is not None
|
||||
else completion_base_cost
|
||||
)
|
||||
completion_cost += float(audio_tokens) * _output_cost_per_audio_token
|
||||
|
||||
## REASONING COST
|
||||
if (
|
||||
_output_cost_per_reasoning_token is not None
|
||||
and reasoning_tokens
|
||||
and reasoning_tokens > 0
|
||||
):
|
||||
if not is_text_tokens_total and reasoning_tokens and reasoning_tokens > 0:
|
||||
_output_cost_per_reasoning_token = (
|
||||
_output_cost_per_reasoning_token
|
||||
if _output_cost_per_reasoning_token is not None
|
||||
else completion_base_cost
|
||||
)
|
||||
completion_cost += float(reasoning_tokens) * _output_cost_per_reasoning_token
|
||||
|
||||
return prompt_cost, completion_cost
|
||||
|
|
|
@ -587,14 +587,15 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
|||
_content_str += "data:{};base64,{}".format(
|
||||
part["inlineData"]["mimeType"], part["inlineData"]["data"]
|
||||
)
|
||||
if part.get("thought") is True:
|
||||
if reasoning_content_str is None:
|
||||
reasoning_content_str = ""
|
||||
reasoning_content_str += _content_str
|
||||
else:
|
||||
if content_str is None:
|
||||
content_str = ""
|
||||
content_str += _content_str
|
||||
if len(_content_str) > 0:
|
||||
if part.get("thought") is True:
|
||||
if reasoning_content_str is None:
|
||||
reasoning_content_str = ""
|
||||
reasoning_content_str += _content_str
|
||||
else:
|
||||
if content_str is None:
|
||||
content_str = ""
|
||||
content_str += _content_str
|
||||
|
||||
return content_str, reasoning_content_str
|
||||
|
||||
|
|
|
@ -4979,35 +4979,6 @@
|
|||
"supports_tool_choice": true
|
||||
},
|
||||
"gemini-2.5-pro-exp-03-25": {
|
||||
"max_tokens": 65536,
|
||||
"max_input_tokens": 1048576,
|
||||
"max_output_tokens": 65536,
|
||||
"max_images_per_prompt": 3000,
|
||||
"max_videos_per_prompt": 10,
|
||||
"max_video_length": 1,
|
||||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_token": 0,
|
||||
"input_cost_per_token_above_200k_tokens": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"output_cost_per_token_above_200k_tokens": 0,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_audio_input": true,
|
||||
"supports_video_input": true,
|
||||
"supports_pdf_input": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
||||
},
|
||||
"gemini-2.5-pro-preview-03-25": {
|
||||
"max_tokens": 65536,
|
||||
"max_input_tokens": 1048576,
|
||||
"max_output_tokens": 65536,
|
||||
|
|
|
@ -26,6 +26,47 @@ from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_toke
|
|||
from litellm.types.utils import Usage
|
||||
|
||||
|
||||
def test_reasoning_tokens_no_price_set():
|
||||
model = "o1-mini"
|
||||
custom_llm_provider = "openai"
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
model_cost_map = litellm.model_cost[model]
|
||||
usage = Usage(
|
||||
completion_tokens=1578,
|
||||
prompt_tokens=17,
|
||||
total_tokens=1595,
|
||||
completion_tokens_details=CompletionTokensDetailsWrapper(
|
||||
accepted_prediction_tokens=None,
|
||||
audio_tokens=None,
|
||||
reasoning_tokens=952,
|
||||
rejected_prediction_tokens=None,
|
||||
text_tokens=626,
|
||||
),
|
||||
prompt_tokens_details=PromptTokensDetailsWrapper(
|
||||
audio_tokens=None, cached_tokens=None, text_tokens=17, image_tokens=None
|
||||
),
|
||||
)
|
||||
prompt_cost, completion_cost = generic_cost_per_token(
|
||||
model=model,
|
||||
usage=usage,
|
||||
custom_llm_provider="openai",
|
||||
)
|
||||
assert round(prompt_cost, 10) == round(
|
||||
model_cost_map["input_cost_per_token"] * usage.prompt_tokens,
|
||||
10,
|
||||
)
|
||||
print(f"completion_cost: {completion_cost}")
|
||||
expected_completion_cost = (
|
||||
model_cost_map["output_cost_per_token"] * usage.completion_tokens
|
||||
)
|
||||
print(f"expected_completion_cost: {expected_completion_cost}")
|
||||
assert round(completion_cost, 10) == round(
|
||||
expected_completion_cost,
|
||||
10,
|
||||
)
|
||||
|
||||
|
||||
def test_reasoning_tokens_gemini():
|
||||
model = "gemini-2.5-flash-preview-04-17"
|
||||
custom_llm_provider = "gemini"
|
||||
|
|
|
@ -239,3 +239,23 @@ def test_vertex_ai_thinking_output_part():
|
|||
content, reasoning_content = v.get_assistant_content_message(parts=parts)
|
||||
assert content == "Hello world"
|
||||
assert reasoning_content == "I'm thinking..."
|
||||
|
||||
|
||||
def test_vertex_ai_empty_content():
|
||||
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
|
||||
VertexGeminiConfig,
|
||||
)
|
||||
from litellm.types.llms.vertex_ai import HttpxPartType
|
||||
|
||||
v = VertexGeminiConfig()
|
||||
parts = [
|
||||
HttpxPartType(
|
||||
functionCall={
|
||||
"name": "get_current_weather",
|
||||
"arguments": "{}",
|
||||
},
|
||||
),
|
||||
]
|
||||
content, reasoning_content = v.get_assistant_content_message(parts=parts)
|
||||
assert content is None
|
||||
assert reasoning_content is None
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue