Cost tracking for gemini-2.5-pro (#9837)

* build(model_prices_and_context_window.json): add google/gemini-2.0-flash-lite-001 versioned pricing

Closes https://github.com/BerriAI/litellm/issues/9829

* build(model_prices_and_context_window.json): add initial support for 'supported_output_modalities' param

* build(model_prices_and_context_window.json): add initial support for 'supported_output_modalities' param

* build(model_prices_and_context_window.json): add supported endpoints to gemini-2.5-pro

* build(model_prices_and_context_window.json): add gemini 200k+ pricing

* feat(utils.py): support cost calculation for gemini-2.5-pro above 200k tokens

Fixes https://github.com/BerriAI/litellm/issues/9807

* build: test dockerfile change

* build: revert apk change

* ci(config.yml): pip install wheel

* ci: test problematic package first

* ci(config.yml): pip install only binary

* ci: try more things

* ci: test different ml_dtypes version

* ci(config.yml): check ml_dtypes==0.4.0

* ci: test

* ci: cleanup config.yml

* ci: specify ml dtypes in requirements.txt

* ci: remove redisvl depedency (temporary)

* fix: fix linting errors

* test: update test

* test: fix test
This commit is contained in:
Krish Dholakia 2025-04-09 18:48:43 -07:00 committed by GitHub
parent 4c1bb74c3d
commit ac4f32fb1e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 251 additions and 91 deletions

View file

@ -1127,6 +1127,7 @@ jobs:
name: Install Dependencies
command: |
python -m pip install --upgrade pip
python -m pip install wheel setuptools
python -m pip install -r requirements.txt
pip install "pytest==7.3.1"
pip install "pytest-retry==1.6.3"

View file

@ -90,35 +90,45 @@ def _generic_cost_per_character(
return prompt_cost, completion_cost
def _get_prompt_token_base_cost(model_info: ModelInfo, usage: Usage) -> float:
def _get_token_base_cost(model_info: ModelInfo, usage: Usage) -> Tuple[float, float]:
"""
Return prompt cost for a given model and usage.
If input_tokens > 128k and `input_cost_per_token_above_128k_tokens` is set, then we use the `input_cost_per_token_above_128k_tokens` field.
If input_tokens > threshold and `input_cost_per_token_above_[x]k_tokens` or `input_cost_per_token_above_[x]_tokens` is set,
then we use the corresponding threshold cost.
"""
input_cost_per_token_above_128k_tokens = model_info.get(
"input_cost_per_token_above_128k_tokens"
)
if _is_above_128k(usage.prompt_tokens) and input_cost_per_token_above_128k_tokens:
return input_cost_per_token_above_128k_tokens
return model_info["input_cost_per_token"]
prompt_base_cost = model_info["input_cost_per_token"]
completion_base_cost = model_info["output_cost_per_token"]
## CHECK IF ABOVE THRESHOLD
threshold: Optional[float] = None
for key, value in sorted(model_info.items(), reverse=True):
if key.startswith("input_cost_per_token_above_") and value is not None:
try:
# Handle both formats: _above_128k_tokens and _above_128_tokens
threshold_str = key.split("_above_")[1].split("_tokens")[0]
threshold = float(threshold_str.replace("k", "")) * (
1000 if "k" in threshold_str else 1
)
if usage.prompt_tokens > threshold:
prompt_base_cost = cast(
float,
model_info.get(key, prompt_base_cost),
)
completion_base_cost = cast(
float,
model_info.get(
f"output_cost_per_token_above_{threshold_str}_tokens",
completion_base_cost,
),
)
break
except (IndexError, ValueError):
continue
except Exception:
continue
def _get_completion_token_base_cost(model_info: ModelInfo, usage: Usage) -> float:
"""
Return prompt cost for a given model and usage.
If input_tokens > 128k and `input_cost_per_token_above_128k_tokens` is set, then we use the `input_cost_per_token_above_128k_tokens` field.
"""
output_cost_per_token_above_128k_tokens = model_info.get(
"output_cost_per_token_above_128k_tokens"
)
if (
_is_above_128k(usage.completion_tokens)
and output_cost_per_token_above_128k_tokens
):
return output_cost_per_token_above_128k_tokens
return model_info["output_cost_per_token"]
return prompt_base_cost, completion_base_cost
def calculate_cost_component(
@ -215,7 +225,9 @@ def generic_cost_per_token(
if text_tokens == 0:
text_tokens = usage.prompt_tokens - cache_hit_tokens - audio_tokens
prompt_base_cost = _get_prompt_token_base_cost(model_info=model_info, usage=usage)
prompt_base_cost, completion_base_cost = _get_token_base_cost(
model_info=model_info, usage=usage
)
prompt_cost = float(text_tokens) * prompt_base_cost
@ -253,9 +265,6 @@ def generic_cost_per_token(
)
## CALCULATE OUTPUT COST
completion_base_cost = _get_completion_token_base_cost(
model_info=model_info, usage=usage
)
text_tokens = usage.completion_tokens
audio_tokens = 0
if usage.completion_tokens_details is not None:

View file

@ -380,6 +380,7 @@
"supports_tool_choice": true,
"supports_native_streaming": false,
"supported_modalities": ["text", "image"],
"supported_output_modalities": ["text"],
"supported_endpoints": ["/v1/responses", "/v1/batch"]
},
"o1-pro-2025-03-19": {
@ -401,6 +402,7 @@
"supports_tool_choice": true,
"supports_native_streaming": false,
"supported_modalities": ["text", "image"],
"supported_output_modalities": ["text"],
"supported_endpoints": ["/v1/responses", "/v1/batch"]
},
"o1": {
@ -4512,20 +4514,10 @@
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_image": 0,
"input_cost_per_video_per_second": 0,
"input_cost_per_audio_per_second": 0,
"input_cost_per_token": 0,
"input_cost_per_character": 0,
"input_cost_per_token_above_128k_tokens": 0,
"input_cost_per_character_above_128k_tokens": 0,
"input_cost_per_image_above_128k_tokens": 0,
"input_cost_per_video_per_second_above_128k_tokens": 0,
"input_cost_per_audio_per_second_above_128k_tokens": 0,
"output_cost_per_token": 0,
"output_cost_per_character": 0,
"output_cost_per_token_above_128k_tokens": 0,
"output_cost_per_character_above_128k_tokens": 0,
"input_cost_per_token": 0.00000125,
"input_cost_per_token_above_200k_tokens": 0.0000025,
"output_cost_per_token": 0.00001,
"output_cost_per_token_above_200k_tokens": 0.000015,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
@ -4536,6 +4528,9 @@
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
},
"gemini-2.0-pro-exp-02-05": {
@ -4548,20 +4543,10 @@
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_image": 0,
"input_cost_per_video_per_second": 0,
"input_cost_per_audio_per_second": 0,
"input_cost_per_token": 0,
"input_cost_per_character": 0,
"input_cost_per_token_above_128k_tokens": 0,
"input_cost_per_character_above_128k_tokens": 0,
"input_cost_per_image_above_128k_tokens": 0,
"input_cost_per_video_per_second_above_128k_tokens": 0,
"input_cost_per_audio_per_second_above_128k_tokens": 0,
"output_cost_per_token": 0,
"output_cost_per_character": 0,
"output_cost_per_token_above_128k_tokens": 0,
"output_cost_per_character_above_128k_tokens": 0,
"input_cost_per_token": 0.00000125,
"input_cost_per_token_above_200k_tokens": 0.0000025,
"output_cost_per_token": 0.00001,
"output_cost_per_token_above_200k_tokens": 0.000015,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
@ -4572,6 +4557,9 @@
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
},
"gemini-2.0-flash-exp": {
@ -4605,6 +4593,8 @@
"supports_vision": true,
"supports_response_schema": true,
"supports_audio_output": true,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text", "image"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
"supports_tool_choice": true
},
@ -4629,6 +4619,8 @@
"supports_response_schema": true,
"supports_audio_output": true,
"supports_tool_choice": true,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text", "image"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
},
"gemini-2.0-flash-thinking-exp": {
@ -4662,6 +4654,8 @@
"supports_vision": true,
"supports_response_schema": true,
"supports_audio_output": true,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text", "image"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
"supports_tool_choice": true
},
@ -4696,6 +4690,8 @@
"supports_vision": true,
"supports_response_schema": false,
"supports_audio_output": false,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text", "image"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
"supports_tool_choice": true
},
@ -4721,6 +4717,7 @@
"supports_audio_output": true,
"supports_audio_input": true,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text", "image"],
"supports_tool_choice": true,
"source": "https://ai.google.dev/pricing#2_0flash"
},
@ -4743,6 +4740,32 @@
"supports_vision": true,
"supports_response_schema": true,
"supports_audio_output": true,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
"supports_tool_choice": true
},
"gemini-2.0-flash-lite-001": {
"max_input_tokens": 1048576,
"max_output_tokens": 8192,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 50,
"input_cost_per_audio_token": 0.000000075,
"input_cost_per_token": 0.000000075,
"output_cost_per_token": 0.0000003,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"supports_audio_output": true,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
"supports_tool_choice": true
},
@ -4808,6 +4831,7 @@
"supports_audio_output": true,
"supports_audio_input": true,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text", "image"],
"supports_tool_choice": true,
"source": "https://ai.google.dev/pricing#2_0flash"
},
@ -4833,6 +4857,8 @@
"supports_response_schema": true,
"supports_audio_output": true,
"supports_tool_choice": true,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text"],
"source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite"
},
"gemini/gemini-2.0-flash-001": {
@ -4858,6 +4884,8 @@
"supports_response_schema": true,
"supports_audio_output": false,
"supports_tool_choice": true,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text", "image"],
"source": "https://ai.google.dev/pricing#2_0flash"
},
"gemini/gemini-2.5-pro-preview-03-25": {
@ -4872,9 +4900,9 @@
"max_pdf_size_mb": 30,
"input_cost_per_audio_token": 0.0000007,
"input_cost_per_token": 0.00000125,
"input_cost_per_token_above_128k_tokens": 0.0000025,
"input_cost_per_token_above_200k_tokens": 0.0000025,
"output_cost_per_token": 0.0000010,
"output_cost_per_token_above_128k_tokens": 0.000015,
"output_cost_per_token_above_200k_tokens": 0.000015,
"litellm_provider": "gemini",
"mode": "chat",
"rpm": 10000,
@ -4885,6 +4913,8 @@
"supports_response_schema": true,
"supports_audio_output": false,
"supports_tool_choice": true,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text"],
"source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview"
},
"gemini/gemini-2.0-flash-exp": {
@ -4920,6 +4950,8 @@
"supports_audio_output": true,
"tpm": 4000000,
"rpm": 10,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text", "image"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
"supports_tool_choice": true
},
@ -4946,6 +4978,8 @@
"supports_response_schema": true,
"supports_audio_output": false,
"supports_tool_choice": true,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite"
},
"gemini/gemini-2.0-flash-thinking-exp": {
@ -4981,6 +5015,8 @@
"supports_audio_output": true,
"tpm": 4000000,
"rpm": 10,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text", "image"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
"supports_tool_choice": true
},
@ -5017,6 +5053,8 @@
"supports_audio_output": true,
"tpm": 4000000,
"rpm": 10,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text", "image"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
"supports_tool_choice": true
},

View file

@ -29,10 +29,14 @@ model_list:
model: databricks/databricks-claude-3-7-sonnet
api_key: os.environ/DATABRICKS_API_KEY
api_base: os.environ/DATABRICKS_API_BASE
- model_name: "gemini/gemini-2.0-flash"
- model_name: "llmaas-meta/llama-3.1-8b-instruct"
litellm_params:
model: gemini/gemini-2.0-flash
api_key: os.environ/GEMINI_API_KEY
model: nvidia_nim/meta/llama-3.3-70b-instruct
api_key: "invalid"
api_base: "http://0.0.0.0:8090"
model_info:
input_cost_per_token: "100"
output_cost_per_token: "100"
litellm_settings:
num_retries: 0

View file

@ -120,6 +120,9 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False):
input_cost_per_character: Optional[float] # only for vertex ai models
input_cost_per_audio_token: Optional[float]
input_cost_per_token_above_128k_tokens: Optional[float] # only for vertex ai models
input_cost_per_token_above_200k_tokens: Optional[
float
] # only for vertex ai gemini-2.5-pro models
input_cost_per_character_above_128k_tokens: Optional[
float
] # only for vertex ai models
@ -136,6 +139,9 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False):
output_cost_per_token_above_128k_tokens: Optional[
float
] # only for vertex ai models
output_cost_per_token_above_200k_tokens: Optional[
float
] # only for vertex ai gemini-2.5-pro models
output_cost_per_character_above_128k_tokens: Optional[
float
] # only for vertex ai models

View file

@ -4532,6 +4532,9 @@ def _get_model_info_helper( # noqa: PLR0915
input_cost_per_token_above_128k_tokens=_model_info.get(
"input_cost_per_token_above_128k_tokens", None
),
input_cost_per_token_above_200k_tokens=_model_info.get(
"input_cost_per_token_above_200k_tokens", None
),
input_cost_per_query=_model_info.get("input_cost_per_query", None),
input_cost_per_second=_model_info.get("input_cost_per_second", None),
input_cost_per_audio_token=_model_info.get(
@ -4556,6 +4559,9 @@ def _get_model_info_helper( # noqa: PLR0915
output_cost_per_character_above_128k_tokens=_model_info.get(
"output_cost_per_character_above_128k_tokens", None
),
output_cost_per_token_above_200k_tokens=_model_info.get(
"output_cost_per_token_above_200k_tokens", None
),
output_cost_per_second=_model_info.get("output_cost_per_second", None),
output_cost_per_image=_model_info.get("output_cost_per_image", None),
output_vector_size=_model_info.get("output_vector_size", None),

View file

@ -380,6 +380,7 @@
"supports_tool_choice": true,
"supports_native_streaming": false,
"supported_modalities": ["text", "image"],
"supported_output_modalities": ["text"],
"supported_endpoints": ["/v1/responses", "/v1/batch"]
},
"o1-pro-2025-03-19": {
@ -401,6 +402,7 @@
"supports_tool_choice": true,
"supports_native_streaming": false,
"supported_modalities": ["text", "image"],
"supported_output_modalities": ["text"],
"supported_endpoints": ["/v1/responses", "/v1/batch"]
},
"o1": {
@ -4512,20 +4514,10 @@
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_image": 0,
"input_cost_per_video_per_second": 0,
"input_cost_per_audio_per_second": 0,
"input_cost_per_token": 0,
"input_cost_per_character": 0,
"input_cost_per_token_above_128k_tokens": 0,
"input_cost_per_character_above_128k_tokens": 0,
"input_cost_per_image_above_128k_tokens": 0,
"input_cost_per_video_per_second_above_128k_tokens": 0,
"input_cost_per_audio_per_second_above_128k_tokens": 0,
"output_cost_per_token": 0,
"output_cost_per_character": 0,
"output_cost_per_token_above_128k_tokens": 0,
"output_cost_per_character_above_128k_tokens": 0,
"input_cost_per_token": 0.00000125,
"input_cost_per_token_above_200k_tokens": 0.0000025,
"output_cost_per_token": 0.00001,
"output_cost_per_token_above_200k_tokens": 0.000015,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
@ -4536,6 +4528,9 @@
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
},
"gemini-2.0-pro-exp-02-05": {
@ -4548,20 +4543,10 @@
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_image": 0,
"input_cost_per_video_per_second": 0,
"input_cost_per_audio_per_second": 0,
"input_cost_per_token": 0,
"input_cost_per_character": 0,
"input_cost_per_token_above_128k_tokens": 0,
"input_cost_per_character_above_128k_tokens": 0,
"input_cost_per_image_above_128k_tokens": 0,
"input_cost_per_video_per_second_above_128k_tokens": 0,
"input_cost_per_audio_per_second_above_128k_tokens": 0,
"output_cost_per_token": 0,
"output_cost_per_character": 0,
"output_cost_per_token_above_128k_tokens": 0,
"output_cost_per_character_above_128k_tokens": 0,
"input_cost_per_token": 0.00000125,
"input_cost_per_token_above_200k_tokens": 0.0000025,
"output_cost_per_token": 0.00001,
"output_cost_per_token_above_200k_tokens": 0.000015,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
@ -4572,6 +4557,9 @@
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
},
"gemini-2.0-flash-exp": {
@ -4605,6 +4593,8 @@
"supports_vision": true,
"supports_response_schema": true,
"supports_audio_output": true,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text", "image"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
"supports_tool_choice": true
},
@ -4629,6 +4619,8 @@
"supports_response_schema": true,
"supports_audio_output": true,
"supports_tool_choice": true,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text", "image"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
},
"gemini-2.0-flash-thinking-exp": {
@ -4662,6 +4654,8 @@
"supports_vision": true,
"supports_response_schema": true,
"supports_audio_output": true,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text", "image"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
"supports_tool_choice": true
},
@ -4696,6 +4690,8 @@
"supports_vision": true,
"supports_response_schema": false,
"supports_audio_output": false,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text", "image"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
"supports_tool_choice": true
},
@ -4721,6 +4717,7 @@
"supports_audio_output": true,
"supports_audio_input": true,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text", "image"],
"supports_tool_choice": true,
"source": "https://ai.google.dev/pricing#2_0flash"
},
@ -4743,6 +4740,32 @@
"supports_vision": true,
"supports_response_schema": true,
"supports_audio_output": true,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
"supports_tool_choice": true
},
"gemini-2.0-flash-lite-001": {
"max_input_tokens": 1048576,
"max_output_tokens": 8192,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 50,
"input_cost_per_audio_token": 0.000000075,
"input_cost_per_token": 0.000000075,
"output_cost_per_token": 0.0000003,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"supports_audio_output": true,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
"supports_tool_choice": true
},
@ -4808,6 +4831,7 @@
"supports_audio_output": true,
"supports_audio_input": true,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text", "image"],
"supports_tool_choice": true,
"source": "https://ai.google.dev/pricing#2_0flash"
},
@ -4833,6 +4857,8 @@
"supports_response_schema": true,
"supports_audio_output": true,
"supports_tool_choice": true,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text"],
"source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite"
},
"gemini/gemini-2.0-flash-001": {
@ -4858,6 +4884,8 @@
"supports_response_schema": true,
"supports_audio_output": false,
"supports_tool_choice": true,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text", "image"],
"source": "https://ai.google.dev/pricing#2_0flash"
},
"gemini/gemini-2.5-pro-preview-03-25": {
@ -4872,9 +4900,9 @@
"max_pdf_size_mb": 30,
"input_cost_per_audio_token": 0.0000007,
"input_cost_per_token": 0.00000125,
"input_cost_per_token_above_128k_tokens": 0.0000025,
"input_cost_per_token_above_200k_tokens": 0.0000025,
"output_cost_per_token": 0.0000010,
"output_cost_per_token_above_128k_tokens": 0.000015,
"output_cost_per_token_above_200k_tokens": 0.000015,
"litellm_provider": "gemini",
"mode": "chat",
"rpm": 10000,
@ -4885,6 +4913,8 @@
"supports_response_schema": true,
"supports_audio_output": false,
"supports_tool_choice": true,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text"],
"source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview"
},
"gemini/gemini-2.0-flash-exp": {
@ -4920,6 +4950,8 @@
"supports_audio_output": true,
"tpm": 4000000,
"rpm": 10,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text", "image"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
"supports_tool_choice": true
},
@ -4946,6 +4978,8 @@
"supports_response_schema": true,
"supports_audio_output": false,
"supports_tool_choice": true,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite"
},
"gemini/gemini-2.0-flash-thinking-exp": {
@ -4981,6 +5015,8 @@
"supports_audio_output": true,
"tpm": 4000000,
"rpm": 10,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text", "image"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
"supports_tool_choice": true
},
@ -5017,6 +5053,8 @@
"supports_audio_output": true,
"tpm": 4000000,
"rpm": 10,
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text", "image"],
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
"supports_tool_choice": true
},

View file

@ -10,7 +10,6 @@ gunicorn==23.0.0 # server dep
uvloop==0.21.0 # uvicorn dep, gives us much better performance under load
boto3==1.34.34 # aws bedrock/sagemaker calls
redis==5.2.1 # redis caching
redisvl==0.4.1 # semantic caching
prisma==0.11.0 # for db
mangum==0.17.0 # for aws lambda functions
pynacl==1.5.0 # for encrypting keys

View file

@ -0,0 +1,50 @@
import json
import os
import sys
import pytest
from fastapi.testclient import TestClient
import litellm
from litellm.litellm_core_utils.llm_cost_calc.tool_call_cost_tracking import (
StandardBuiltInToolCostTracking,
)
from litellm.types.llms.openai import FileSearchTool, WebSearchOptions
from litellm.types.utils import ModelInfo, ModelResponse, StandardBuiltInToolsParams
sys.path.insert(
0, os.path.abspath("../../..")
) # Adds the parent directory to the system path
from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_token
from litellm.types.utils import Usage
def test_generic_cost_per_token_above_200k_tokens():
model = "gemini-2.5-pro-exp-03-25"
custom_llm_provider = "vertex_ai"
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
model_cost_map = litellm.model_cost[model]
prompt_tokens = 220 * 1e6
completion_tokens = 150
usage = Usage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=prompt_tokens + completion_tokens,
)
prompt_cost, completion_cost = generic_cost_per_token(
model=model,
usage=usage,
custom_llm_provider=custom_llm_provider,
)
assert round(prompt_cost, 10) == round(
model_cost_map["input_cost_per_token_above_200k_tokens"] * usage.prompt_tokens,
10,
)
assert round(completion_cost, 10) == round(
model_cost_map["output_cost_per_token_above_200k_tokens"]
* usage.completion_tokens,
10,
)

View file

@ -431,6 +431,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
"input_cost_per_character_above_128k_tokens": {"type": "number"},
"input_cost_per_image": {"type": "number"},
"input_cost_per_image_above_128k_tokens": {"type": "number"},
"input_cost_per_token_above_200k_tokens": {"type": "number"},
"input_cost_per_pixel": {"type": "number"},
"input_cost_per_query": {"type": "number"},
"input_cost_per_request": {"type": "number"},
@ -483,6 +484,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
"output_cost_per_second": {"type": "number"},
"output_cost_per_token": {"type": "number"},
"output_cost_per_token_above_128k_tokens": {"type": "number"},
"output_cost_per_token_above_200k_tokens": {"type": "number"},
"output_cost_per_token_batches": {"type": "number"},
"output_db_cost_per_token": {"type": "number"},
"output_dbu_cost_per_token": {"type": "number"},
@ -541,6 +543,13 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
"enum": ["text", "audio", "image", "video"],
},
},
"supported_output_modalities": {
"type": "array",
"items": {
"type": "string",
"enum": ["text", "image"],
},
},
"supports_native_streaming": {"type": "boolean"},
},
"additionalProperties": False,