mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
Cost tracking for gemini-2.5-pro
(#9837)
* build(model_prices_and_context_window.json): add google/gemini-2.0-flash-lite-001 versioned pricing Closes https://github.com/BerriAI/litellm/issues/9829 * build(model_prices_and_context_window.json): add initial support for 'supported_output_modalities' param * build(model_prices_and_context_window.json): add initial support for 'supported_output_modalities' param * build(model_prices_and_context_window.json): add supported endpoints to gemini-2.5-pro * build(model_prices_and_context_window.json): add gemini 200k+ pricing * feat(utils.py): support cost calculation for gemini-2.5-pro above 200k tokens Fixes https://github.com/BerriAI/litellm/issues/9807 * build: test dockerfile change * build: revert apk change * ci(config.yml): pip install wheel * ci: test problematic package first * ci(config.yml): pip install only binary * ci: try more things * ci: test different ml_dtypes version * ci(config.yml): check ml_dtypes==0.4.0 * ci: test * ci: cleanup config.yml * ci: specify ml dtypes in requirements.txt * ci: remove redisvl depedency (temporary) * fix: fix linting errors * test: update test * test: fix test
This commit is contained in:
parent
4c1bb74c3d
commit
ac4f32fb1e
10 changed files with 251 additions and 91 deletions
|
@ -1127,6 +1127,7 @@ jobs:
|
|||
name: Install Dependencies
|
||||
command: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install wheel setuptools
|
||||
python -m pip install -r requirements.txt
|
||||
pip install "pytest==7.3.1"
|
||||
pip install "pytest-retry==1.6.3"
|
||||
|
|
|
@ -90,35 +90,45 @@ def _generic_cost_per_character(
|
|||
return prompt_cost, completion_cost
|
||||
|
||||
|
||||
def _get_prompt_token_base_cost(model_info: ModelInfo, usage: Usage) -> float:
|
||||
def _get_token_base_cost(model_info: ModelInfo, usage: Usage) -> Tuple[float, float]:
|
||||
"""
|
||||
Return prompt cost for a given model and usage.
|
||||
|
||||
If input_tokens > 128k and `input_cost_per_token_above_128k_tokens` is set, then we use the `input_cost_per_token_above_128k_tokens` field.
|
||||
If input_tokens > threshold and `input_cost_per_token_above_[x]k_tokens` or `input_cost_per_token_above_[x]_tokens` is set,
|
||||
then we use the corresponding threshold cost.
|
||||
"""
|
||||
input_cost_per_token_above_128k_tokens = model_info.get(
|
||||
"input_cost_per_token_above_128k_tokens"
|
||||
)
|
||||
if _is_above_128k(usage.prompt_tokens) and input_cost_per_token_above_128k_tokens:
|
||||
return input_cost_per_token_above_128k_tokens
|
||||
return model_info["input_cost_per_token"]
|
||||
prompt_base_cost = model_info["input_cost_per_token"]
|
||||
completion_base_cost = model_info["output_cost_per_token"]
|
||||
|
||||
## CHECK IF ABOVE THRESHOLD
|
||||
threshold: Optional[float] = None
|
||||
for key, value in sorted(model_info.items(), reverse=True):
|
||||
if key.startswith("input_cost_per_token_above_") and value is not None:
|
||||
try:
|
||||
# Handle both formats: _above_128k_tokens and _above_128_tokens
|
||||
threshold_str = key.split("_above_")[1].split("_tokens")[0]
|
||||
threshold = float(threshold_str.replace("k", "")) * (
|
||||
1000 if "k" in threshold_str else 1
|
||||
)
|
||||
if usage.prompt_tokens > threshold:
|
||||
prompt_base_cost = cast(
|
||||
float,
|
||||
model_info.get(key, prompt_base_cost),
|
||||
)
|
||||
completion_base_cost = cast(
|
||||
float,
|
||||
model_info.get(
|
||||
f"output_cost_per_token_above_{threshold_str}_tokens",
|
||||
completion_base_cost,
|
||||
),
|
||||
)
|
||||
break
|
||||
except (IndexError, ValueError):
|
||||
continue
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
def _get_completion_token_base_cost(model_info: ModelInfo, usage: Usage) -> float:
|
||||
"""
|
||||
Return prompt cost for a given model and usage.
|
||||
|
||||
If input_tokens > 128k and `input_cost_per_token_above_128k_tokens` is set, then we use the `input_cost_per_token_above_128k_tokens` field.
|
||||
"""
|
||||
output_cost_per_token_above_128k_tokens = model_info.get(
|
||||
"output_cost_per_token_above_128k_tokens"
|
||||
)
|
||||
if (
|
||||
_is_above_128k(usage.completion_tokens)
|
||||
and output_cost_per_token_above_128k_tokens
|
||||
):
|
||||
return output_cost_per_token_above_128k_tokens
|
||||
return model_info["output_cost_per_token"]
|
||||
return prompt_base_cost, completion_base_cost
|
||||
|
||||
|
||||
def calculate_cost_component(
|
||||
|
@ -215,7 +225,9 @@ def generic_cost_per_token(
|
|||
if text_tokens == 0:
|
||||
text_tokens = usage.prompt_tokens - cache_hit_tokens - audio_tokens
|
||||
|
||||
prompt_base_cost = _get_prompt_token_base_cost(model_info=model_info, usage=usage)
|
||||
prompt_base_cost, completion_base_cost = _get_token_base_cost(
|
||||
model_info=model_info, usage=usage
|
||||
)
|
||||
|
||||
prompt_cost = float(text_tokens) * prompt_base_cost
|
||||
|
||||
|
@ -253,9 +265,6 @@ def generic_cost_per_token(
|
|||
)
|
||||
|
||||
## CALCULATE OUTPUT COST
|
||||
completion_base_cost = _get_completion_token_base_cost(
|
||||
model_info=model_info, usage=usage
|
||||
)
|
||||
text_tokens = usage.completion_tokens
|
||||
audio_tokens = 0
|
||||
if usage.completion_tokens_details is not None:
|
||||
|
|
|
@ -380,6 +380,7 @@
|
|||
"supports_tool_choice": true,
|
||||
"supports_native_streaming": false,
|
||||
"supported_modalities": ["text", "image"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"supported_endpoints": ["/v1/responses", "/v1/batch"]
|
||||
},
|
||||
"o1-pro-2025-03-19": {
|
||||
|
@ -401,6 +402,7 @@
|
|||
"supports_tool_choice": true,
|
||||
"supports_native_streaming": false,
|
||||
"supported_modalities": ["text", "image"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"supported_endpoints": ["/v1/responses", "/v1/batch"]
|
||||
},
|
||||
"o1": {
|
||||
|
@ -4512,20 +4514,10 @@
|
|||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_image": 0,
|
||||
"input_cost_per_video_per_second": 0,
|
||||
"input_cost_per_audio_per_second": 0,
|
||||
"input_cost_per_token": 0,
|
||||
"input_cost_per_character": 0,
|
||||
"input_cost_per_token_above_128k_tokens": 0,
|
||||
"input_cost_per_character_above_128k_tokens": 0,
|
||||
"input_cost_per_image_above_128k_tokens": 0,
|
||||
"input_cost_per_video_per_second_above_128k_tokens": 0,
|
||||
"input_cost_per_audio_per_second_above_128k_tokens": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"output_cost_per_character": 0,
|
||||
"output_cost_per_token_above_128k_tokens": 0,
|
||||
"output_cost_per_character_above_128k_tokens": 0,
|
||||
"input_cost_per_token": 0.00000125,
|
||||
"input_cost_per_token_above_200k_tokens": 0.0000025,
|
||||
"output_cost_per_token": 0.00001,
|
||||
"output_cost_per_token_above_200k_tokens": 0.000015,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
|
@ -4536,6 +4528,9 @@
|
|||
"supports_pdf_input": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
||||
},
|
||||
"gemini-2.0-pro-exp-02-05": {
|
||||
|
@ -4548,20 +4543,10 @@
|
|||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_image": 0,
|
||||
"input_cost_per_video_per_second": 0,
|
||||
"input_cost_per_audio_per_second": 0,
|
||||
"input_cost_per_token": 0,
|
||||
"input_cost_per_character": 0,
|
||||
"input_cost_per_token_above_128k_tokens": 0,
|
||||
"input_cost_per_character_above_128k_tokens": 0,
|
||||
"input_cost_per_image_above_128k_tokens": 0,
|
||||
"input_cost_per_video_per_second_above_128k_tokens": 0,
|
||||
"input_cost_per_audio_per_second_above_128k_tokens": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"output_cost_per_character": 0,
|
||||
"output_cost_per_token_above_128k_tokens": 0,
|
||||
"output_cost_per_character_above_128k_tokens": 0,
|
||||
"input_cost_per_token": 0.00000125,
|
||||
"input_cost_per_token_above_200k_tokens": 0.0000025,
|
||||
"output_cost_per_token": 0.00001,
|
||||
"output_cost_per_token_above_200k_tokens": 0.000015,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
|
@ -4572,6 +4557,9 @@
|
|||
"supports_pdf_input": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
||||
},
|
||||
"gemini-2.0-flash-exp": {
|
||||
|
@ -4605,6 +4593,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -4629,6 +4619,8 @@
|
|||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supports_tool_choice": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
||||
},
|
||||
"gemini-2.0-flash-thinking-exp": {
|
||||
|
@ -4662,6 +4654,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -4696,6 +4690,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_response_schema": false,
|
||||
"supports_audio_output": false,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -4721,6 +4717,7 @@
|
|||
"supports_audio_output": true,
|
||||
"supports_audio_input": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"supports_tool_choice": true,
|
||||
"source": "https://ai.google.dev/pricing#2_0flash"
|
||||
},
|
||||
|
@ -4743,6 +4740,32 @@
|
|||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"gemini-2.0-flash-lite-001": {
|
||||
"max_input_tokens": 1048576,
|
||||
"max_output_tokens": 8192,
|
||||
"max_images_per_prompt": 3000,
|
||||
"max_videos_per_prompt": 10,
|
||||
"max_video_length": 1,
|
||||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 50,
|
||||
"input_cost_per_audio_token": 0.000000075,
|
||||
"input_cost_per_token": 0.000000075,
|
||||
"output_cost_per_token": 0.0000003,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -4808,6 +4831,7 @@
|
|||
"supports_audio_output": true,
|
||||
"supports_audio_input": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"supports_tool_choice": true,
|
||||
"source": "https://ai.google.dev/pricing#2_0flash"
|
||||
},
|
||||
|
@ -4833,6 +4857,8 @@
|
|||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supports_tool_choice": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite"
|
||||
},
|
||||
"gemini/gemini-2.0-flash-001": {
|
||||
|
@ -4858,6 +4884,8 @@
|
|||
"supports_response_schema": true,
|
||||
"supports_audio_output": false,
|
||||
"supports_tool_choice": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://ai.google.dev/pricing#2_0flash"
|
||||
},
|
||||
"gemini/gemini-2.5-pro-preview-03-25": {
|
||||
|
@ -4872,9 +4900,9 @@
|
|||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_audio_token": 0.0000007,
|
||||
"input_cost_per_token": 0.00000125,
|
||||
"input_cost_per_token_above_128k_tokens": 0.0000025,
|
||||
"input_cost_per_token_above_200k_tokens": 0.0000025,
|
||||
"output_cost_per_token": 0.0000010,
|
||||
"output_cost_per_token_above_128k_tokens": 0.000015,
|
||||
"output_cost_per_token_above_200k_tokens": 0.000015,
|
||||
"litellm_provider": "gemini",
|
||||
"mode": "chat",
|
||||
"rpm": 10000,
|
||||
|
@ -4885,6 +4913,8 @@
|
|||
"supports_response_schema": true,
|
||||
"supports_audio_output": false,
|
||||
"supports_tool_choice": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview"
|
||||
},
|
||||
"gemini/gemini-2.0-flash-exp": {
|
||||
|
@ -4920,6 +4950,8 @@
|
|||
"supports_audio_output": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 10,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -4946,6 +4978,8 @@
|
|||
"supports_response_schema": true,
|
||||
"supports_audio_output": false,
|
||||
"supports_tool_choice": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite"
|
||||
},
|
||||
"gemini/gemini-2.0-flash-thinking-exp": {
|
||||
|
@ -4981,6 +5015,8 @@
|
|||
"supports_audio_output": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 10,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -5017,6 +5053,8 @@
|
|||
"supports_audio_output": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 10,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
|
|
@ -29,10 +29,14 @@ model_list:
|
|||
model: databricks/databricks-claude-3-7-sonnet
|
||||
api_key: os.environ/DATABRICKS_API_KEY
|
||||
api_base: os.environ/DATABRICKS_API_BASE
|
||||
- model_name: "gemini/gemini-2.0-flash"
|
||||
- model_name: "llmaas-meta/llama-3.1-8b-instruct"
|
||||
litellm_params:
|
||||
model: gemini/gemini-2.0-flash
|
||||
api_key: os.environ/GEMINI_API_KEY
|
||||
model: nvidia_nim/meta/llama-3.3-70b-instruct
|
||||
api_key: "invalid"
|
||||
api_base: "http://0.0.0.0:8090"
|
||||
model_info:
|
||||
input_cost_per_token: "100"
|
||||
output_cost_per_token: "100"
|
||||
|
||||
litellm_settings:
|
||||
num_retries: 0
|
||||
|
|
|
@ -120,6 +120,9 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False):
|
|||
input_cost_per_character: Optional[float] # only for vertex ai models
|
||||
input_cost_per_audio_token: Optional[float]
|
||||
input_cost_per_token_above_128k_tokens: Optional[float] # only for vertex ai models
|
||||
input_cost_per_token_above_200k_tokens: Optional[
|
||||
float
|
||||
] # only for vertex ai gemini-2.5-pro models
|
||||
input_cost_per_character_above_128k_tokens: Optional[
|
||||
float
|
||||
] # only for vertex ai models
|
||||
|
@ -136,6 +139,9 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False):
|
|||
output_cost_per_token_above_128k_tokens: Optional[
|
||||
float
|
||||
] # only for vertex ai models
|
||||
output_cost_per_token_above_200k_tokens: Optional[
|
||||
float
|
||||
] # only for vertex ai gemini-2.5-pro models
|
||||
output_cost_per_character_above_128k_tokens: Optional[
|
||||
float
|
||||
] # only for vertex ai models
|
||||
|
|
|
@ -4532,6 +4532,9 @@ def _get_model_info_helper( # noqa: PLR0915
|
|||
input_cost_per_token_above_128k_tokens=_model_info.get(
|
||||
"input_cost_per_token_above_128k_tokens", None
|
||||
),
|
||||
input_cost_per_token_above_200k_tokens=_model_info.get(
|
||||
"input_cost_per_token_above_200k_tokens", None
|
||||
),
|
||||
input_cost_per_query=_model_info.get("input_cost_per_query", None),
|
||||
input_cost_per_second=_model_info.get("input_cost_per_second", None),
|
||||
input_cost_per_audio_token=_model_info.get(
|
||||
|
@ -4556,6 +4559,9 @@ def _get_model_info_helper( # noqa: PLR0915
|
|||
output_cost_per_character_above_128k_tokens=_model_info.get(
|
||||
"output_cost_per_character_above_128k_tokens", None
|
||||
),
|
||||
output_cost_per_token_above_200k_tokens=_model_info.get(
|
||||
"output_cost_per_token_above_200k_tokens", None
|
||||
),
|
||||
output_cost_per_second=_model_info.get("output_cost_per_second", None),
|
||||
output_cost_per_image=_model_info.get("output_cost_per_image", None),
|
||||
output_vector_size=_model_info.get("output_vector_size", None),
|
||||
|
|
|
@ -380,6 +380,7 @@
|
|||
"supports_tool_choice": true,
|
||||
"supports_native_streaming": false,
|
||||
"supported_modalities": ["text", "image"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"supported_endpoints": ["/v1/responses", "/v1/batch"]
|
||||
},
|
||||
"o1-pro-2025-03-19": {
|
||||
|
@ -401,6 +402,7 @@
|
|||
"supports_tool_choice": true,
|
||||
"supports_native_streaming": false,
|
||||
"supported_modalities": ["text", "image"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"supported_endpoints": ["/v1/responses", "/v1/batch"]
|
||||
},
|
||||
"o1": {
|
||||
|
@ -4512,20 +4514,10 @@
|
|||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_image": 0,
|
||||
"input_cost_per_video_per_second": 0,
|
||||
"input_cost_per_audio_per_second": 0,
|
||||
"input_cost_per_token": 0,
|
||||
"input_cost_per_character": 0,
|
||||
"input_cost_per_token_above_128k_tokens": 0,
|
||||
"input_cost_per_character_above_128k_tokens": 0,
|
||||
"input_cost_per_image_above_128k_tokens": 0,
|
||||
"input_cost_per_video_per_second_above_128k_tokens": 0,
|
||||
"input_cost_per_audio_per_second_above_128k_tokens": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"output_cost_per_character": 0,
|
||||
"output_cost_per_token_above_128k_tokens": 0,
|
||||
"output_cost_per_character_above_128k_tokens": 0,
|
||||
"input_cost_per_token": 0.00000125,
|
||||
"input_cost_per_token_above_200k_tokens": 0.0000025,
|
||||
"output_cost_per_token": 0.00001,
|
||||
"output_cost_per_token_above_200k_tokens": 0.000015,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
|
@ -4536,6 +4528,9 @@
|
|||
"supports_pdf_input": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
||||
},
|
||||
"gemini-2.0-pro-exp-02-05": {
|
||||
|
@ -4548,20 +4543,10 @@
|
|||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_image": 0,
|
||||
"input_cost_per_video_per_second": 0,
|
||||
"input_cost_per_audio_per_second": 0,
|
||||
"input_cost_per_token": 0,
|
||||
"input_cost_per_character": 0,
|
||||
"input_cost_per_token_above_128k_tokens": 0,
|
||||
"input_cost_per_character_above_128k_tokens": 0,
|
||||
"input_cost_per_image_above_128k_tokens": 0,
|
||||
"input_cost_per_video_per_second_above_128k_tokens": 0,
|
||||
"input_cost_per_audio_per_second_above_128k_tokens": 0,
|
||||
"output_cost_per_token": 0,
|
||||
"output_cost_per_character": 0,
|
||||
"output_cost_per_token_above_128k_tokens": 0,
|
||||
"output_cost_per_character_above_128k_tokens": 0,
|
||||
"input_cost_per_token": 0.00000125,
|
||||
"input_cost_per_token_above_200k_tokens": 0.0000025,
|
||||
"output_cost_per_token": 0.00001,
|
||||
"output_cost_per_token_above_200k_tokens": 0.000015,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
|
@ -4572,6 +4557,9 @@
|
|||
"supports_pdf_input": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_tool_choice": true,
|
||||
"supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
||||
},
|
||||
"gemini-2.0-flash-exp": {
|
||||
|
@ -4605,6 +4593,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -4629,6 +4619,8 @@
|
|||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supports_tool_choice": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
|
||||
},
|
||||
"gemini-2.0-flash-thinking-exp": {
|
||||
|
@ -4662,6 +4654,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -4696,6 +4690,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_response_schema": false,
|
||||
"supports_audio_output": false,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -4721,6 +4717,7 @@
|
|||
"supports_audio_output": true,
|
||||
"supports_audio_input": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"supports_tool_choice": true,
|
||||
"source": "https://ai.google.dev/pricing#2_0flash"
|
||||
},
|
||||
|
@ -4743,6 +4740,32 @@
|
|||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"gemini-2.0-flash-lite-001": {
|
||||
"max_input_tokens": 1048576,
|
||||
"max_output_tokens": 8192,
|
||||
"max_images_per_prompt": 3000,
|
||||
"max_videos_per_prompt": 10,
|
||||
"max_video_length": 1,
|
||||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 50,
|
||||
"input_cost_per_audio_token": 0.000000075,
|
||||
"input_cost_per_token": 0.000000075,
|
||||
"output_cost_per_token": 0.0000003,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -4808,6 +4831,7 @@
|
|||
"supports_audio_output": true,
|
||||
"supports_audio_input": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"supports_tool_choice": true,
|
||||
"source": "https://ai.google.dev/pricing#2_0flash"
|
||||
},
|
||||
|
@ -4833,6 +4857,8 @@
|
|||
"supports_response_schema": true,
|
||||
"supports_audio_output": true,
|
||||
"supports_tool_choice": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite"
|
||||
},
|
||||
"gemini/gemini-2.0-flash-001": {
|
||||
|
@ -4858,6 +4884,8 @@
|
|||
"supports_response_schema": true,
|
||||
"supports_audio_output": false,
|
||||
"supports_tool_choice": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://ai.google.dev/pricing#2_0flash"
|
||||
},
|
||||
"gemini/gemini-2.5-pro-preview-03-25": {
|
||||
|
@ -4872,9 +4900,9 @@
|
|||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_audio_token": 0.0000007,
|
||||
"input_cost_per_token": 0.00000125,
|
||||
"input_cost_per_token_above_128k_tokens": 0.0000025,
|
||||
"input_cost_per_token_above_200k_tokens": 0.0000025,
|
||||
"output_cost_per_token": 0.0000010,
|
||||
"output_cost_per_token_above_128k_tokens": 0.000015,
|
||||
"output_cost_per_token_above_200k_tokens": 0.000015,
|
||||
"litellm_provider": "gemini",
|
||||
"mode": "chat",
|
||||
"rpm": 10000,
|
||||
|
@ -4885,6 +4913,8 @@
|
|||
"supports_response_schema": true,
|
||||
"supports_audio_output": false,
|
||||
"supports_tool_choice": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview"
|
||||
},
|
||||
"gemini/gemini-2.0-flash-exp": {
|
||||
|
@ -4920,6 +4950,8 @@
|
|||
"supports_audio_output": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 10,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -4946,6 +4978,8 @@
|
|||
"supports_response_schema": true,
|
||||
"supports_audio_output": false,
|
||||
"supports_tool_choice": true,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite"
|
||||
},
|
||||
"gemini/gemini-2.0-flash-thinking-exp": {
|
||||
|
@ -4981,6 +5015,8 @@
|
|||
"supports_audio_output": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 10,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -5017,6 +5053,8 @@
|
|||
"supports_audio_output": true,
|
||||
"tpm": 4000000,
|
||||
"rpm": 10,
|
||||
"supported_modalities": ["text", "image", "audio", "video"],
|
||||
"supported_output_modalities": ["text", "image"],
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
|
|
@ -10,7 +10,6 @@ gunicorn==23.0.0 # server dep
|
|||
uvloop==0.21.0 # uvicorn dep, gives us much better performance under load
|
||||
boto3==1.34.34 # aws bedrock/sagemaker calls
|
||||
redis==5.2.1 # redis caching
|
||||
redisvl==0.4.1 # semantic caching
|
||||
prisma==0.11.0 # for db
|
||||
mangum==0.17.0 # for aws lambda functions
|
||||
pynacl==1.5.0 # for encrypting keys
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
import litellm
|
||||
from litellm.litellm_core_utils.llm_cost_calc.tool_call_cost_tracking import (
|
||||
StandardBuiltInToolCostTracking,
|
||||
)
|
||||
from litellm.types.llms.openai import FileSearchTool, WebSearchOptions
|
||||
from litellm.types.utils import ModelInfo, ModelResponse, StandardBuiltInToolsParams
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_token
|
||||
from litellm.types.utils import Usage
|
||||
|
||||
|
||||
def test_generic_cost_per_token_above_200k_tokens():
|
||||
model = "gemini-2.5-pro-exp-03-25"
|
||||
custom_llm_provider = "vertex_ai"
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
model_cost_map = litellm.model_cost[model]
|
||||
prompt_tokens = 220 * 1e6
|
||||
completion_tokens = 150
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
)
|
||||
prompt_cost, completion_cost = generic_cost_per_token(
|
||||
model=model,
|
||||
usage=usage,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
assert round(prompt_cost, 10) == round(
|
||||
model_cost_map["input_cost_per_token_above_200k_tokens"] * usage.prompt_tokens,
|
||||
10,
|
||||
)
|
||||
assert round(completion_cost, 10) == round(
|
||||
model_cost_map["output_cost_per_token_above_200k_tokens"]
|
||||
* usage.completion_tokens,
|
||||
10,
|
||||
)
|
|
@ -431,6 +431,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
|
|||
"input_cost_per_character_above_128k_tokens": {"type": "number"},
|
||||
"input_cost_per_image": {"type": "number"},
|
||||
"input_cost_per_image_above_128k_tokens": {"type": "number"},
|
||||
"input_cost_per_token_above_200k_tokens": {"type": "number"},
|
||||
"input_cost_per_pixel": {"type": "number"},
|
||||
"input_cost_per_query": {"type": "number"},
|
||||
"input_cost_per_request": {"type": "number"},
|
||||
|
@ -483,6 +484,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
|
|||
"output_cost_per_second": {"type": "number"},
|
||||
"output_cost_per_token": {"type": "number"},
|
||||
"output_cost_per_token_above_128k_tokens": {"type": "number"},
|
||||
"output_cost_per_token_above_200k_tokens": {"type": "number"},
|
||||
"output_cost_per_token_batches": {"type": "number"},
|
||||
"output_db_cost_per_token": {"type": "number"},
|
||||
"output_dbu_cost_per_token": {"type": "number"},
|
||||
|
@ -541,6 +543,13 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
|
|||
"enum": ["text", "audio", "image", "video"],
|
||||
},
|
||||
},
|
||||
"supported_output_modalities": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"enum": ["text", "image"],
|
||||
},
|
||||
},
|
||||
"supports_native_streaming": {"type": "boolean"},
|
||||
},
|
||||
"additionalProperties": False,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue