diff --git a/.circleci/config.yml b/.circleci/config.yml index 14a22a5995..32414e8c4e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1127,6 +1127,7 @@ jobs: name: Install Dependencies command: | python -m pip install --upgrade pip + python -m pip install wheel setuptools python -m pip install -r requirements.txt pip install "pytest==7.3.1" pip install "pytest-retry==1.6.3" diff --git a/litellm/litellm_core_utils/llm_cost_calc/utils.py b/litellm/litellm_core_utils/llm_cost_calc/utils.py index caccbc4bac..ae5eb286e4 100644 --- a/litellm/litellm_core_utils/llm_cost_calc/utils.py +++ b/litellm/litellm_core_utils/llm_cost_calc/utils.py @@ -90,35 +90,45 @@ def _generic_cost_per_character( return prompt_cost, completion_cost -def _get_prompt_token_base_cost(model_info: ModelInfo, usage: Usage) -> float: +def _get_token_base_cost(model_info: ModelInfo, usage: Usage) -> Tuple[float, float]: """ Return prompt cost for a given model and usage. - If input_tokens > 128k and `input_cost_per_token_above_128k_tokens` is set, then we use the `input_cost_per_token_above_128k_tokens` field. + If input_tokens > threshold and `input_cost_per_token_above_[x]k_tokens` or `input_cost_per_token_above_[x]_tokens` is set, + then we use the corresponding threshold cost. """ - input_cost_per_token_above_128k_tokens = model_info.get( - "input_cost_per_token_above_128k_tokens" - ) - if _is_above_128k(usage.prompt_tokens) and input_cost_per_token_above_128k_tokens: - return input_cost_per_token_above_128k_tokens - return model_info["input_cost_per_token"] + prompt_base_cost = model_info["input_cost_per_token"] + completion_base_cost = model_info["output_cost_per_token"] + ## CHECK IF ABOVE THRESHOLD + threshold: Optional[float] = None + for key, value in sorted(model_info.items(), reverse=True): + if key.startswith("input_cost_per_token_above_") and value is not None: + try: + # Handle both formats: _above_128k_tokens and _above_128_tokens + threshold_str = key.split("_above_")[1].split("_tokens")[0] + threshold = float(threshold_str.replace("k", "")) * ( + 1000 if "k" in threshold_str else 1 + ) + if usage.prompt_tokens > threshold: + prompt_base_cost = cast( + float, + model_info.get(key, prompt_base_cost), + ) + completion_base_cost = cast( + float, + model_info.get( + f"output_cost_per_token_above_{threshold_str}_tokens", + completion_base_cost, + ), + ) + break + except (IndexError, ValueError): + continue + except Exception: + continue -def _get_completion_token_base_cost(model_info: ModelInfo, usage: Usage) -> float: - """ - Return prompt cost for a given model and usage. - - If input_tokens > 128k and `input_cost_per_token_above_128k_tokens` is set, then we use the `input_cost_per_token_above_128k_tokens` field. - """ - output_cost_per_token_above_128k_tokens = model_info.get( - "output_cost_per_token_above_128k_tokens" - ) - if ( - _is_above_128k(usage.completion_tokens) - and output_cost_per_token_above_128k_tokens - ): - return output_cost_per_token_above_128k_tokens - return model_info["output_cost_per_token"] + return prompt_base_cost, completion_base_cost def calculate_cost_component( @@ -215,7 +225,9 @@ def generic_cost_per_token( if text_tokens == 0: text_tokens = usage.prompt_tokens - cache_hit_tokens - audio_tokens - prompt_base_cost = _get_prompt_token_base_cost(model_info=model_info, usage=usage) + prompt_base_cost, completion_base_cost = _get_token_base_cost( + model_info=model_info, usage=usage + ) prompt_cost = float(text_tokens) * prompt_base_cost @@ -253,9 +265,6 @@ def generic_cost_per_token( ) ## CALCULATE OUTPUT COST - completion_base_cost = _get_completion_token_base_cost( - model_info=model_info, usage=usage - ) text_tokens = usage.completion_tokens audio_tokens = 0 if usage.completion_tokens_details is not None: diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 7e5be4dc6b..c67c3b85af 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -380,6 +380,7 @@ "supports_tool_choice": true, "supports_native_streaming": false, "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supported_endpoints": ["/v1/responses", "/v1/batch"] }, "o1-pro-2025-03-19": { @@ -401,6 +402,7 @@ "supports_tool_choice": true, "supports_native_streaming": false, "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supported_endpoints": ["/v1/responses", "/v1/batch"] }, "o1": { @@ -4512,20 +4514,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_image": 0, - "input_cost_per_video_per_second": 0, - "input_cost_per_audio_per_second": 0, - "input_cost_per_token": 0, - "input_cost_per_character": 0, - "input_cost_per_token_above_128k_tokens": 0, - "input_cost_per_character_above_128k_tokens": 0, - "input_cost_per_image_above_128k_tokens": 0, - "input_cost_per_video_per_second_above_128k_tokens": 0, - "input_cost_per_audio_per_second_above_128k_tokens": 0, - "output_cost_per_token": 0, - "output_cost_per_character": 0, - "output_cost_per_token_above_128k_tokens": 0, - "output_cost_per_character_above_128k_tokens": 0, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -4536,6 +4528,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" }, "gemini-2.0-pro-exp-02-05": { @@ -4548,20 +4543,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_image": 0, - "input_cost_per_video_per_second": 0, - "input_cost_per_audio_per_second": 0, - "input_cost_per_token": 0, - "input_cost_per_character": 0, - "input_cost_per_token_above_128k_tokens": 0, - "input_cost_per_character_above_128k_tokens": 0, - "input_cost_per_image_above_128k_tokens": 0, - "input_cost_per_video_per_second_above_128k_tokens": 0, - "input_cost_per_audio_per_second_above_128k_tokens": 0, - "output_cost_per_token": 0, - "output_cost_per_character": 0, - "output_cost_per_token_above_128k_tokens": 0, - "output_cost_per_character_above_128k_tokens": 0, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -4572,6 +4557,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" }, "gemini-2.0-flash-exp": { @@ -4605,6 +4593,8 @@ "supports_vision": true, "supports_response_schema": true, "supports_audio_output": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supports_tool_choice": true }, @@ -4629,6 +4619,8 @@ "supports_response_schema": true, "supports_audio_output": true, "supports_tool_choice": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" }, "gemini-2.0-flash-thinking-exp": { @@ -4662,6 +4654,8 @@ "supports_vision": true, "supports_response_schema": true, "supports_audio_output": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, @@ -4696,6 +4690,8 @@ "supports_vision": true, "supports_response_schema": false, "supports_audio_output": false, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, @@ -4721,6 +4717,7 @@ "supports_audio_output": true, "supports_audio_input": true, "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_tool_choice": true, "source": "https://ai.google.dev/pricing#2_0flash" }, @@ -4743,6 +4740,32 @@ "supports_vision": true, "supports_response_schema": true, "supports_audio_output": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", + "supports_tool_choice": true + }, + "gemini-2.0-flash-lite-001": { + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 50, + "input_cost_per_audio_token": 0.000000075, + "input_cost_per_token": 0.000000075, + "output_cost_per_token": 0.0000003, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_audio_output": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, @@ -4808,6 +4831,7 @@ "supports_audio_output": true, "supports_audio_input": true, "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_tool_choice": true, "source": "https://ai.google.dev/pricing#2_0flash" }, @@ -4833,6 +4857,8 @@ "supports_response_schema": true, "supports_audio_output": true, "supports_tool_choice": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite" }, "gemini/gemini-2.0-flash-001": { @@ -4858,6 +4884,8 @@ "supports_response_schema": true, "supports_audio_output": false, "supports_tool_choice": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://ai.google.dev/pricing#2_0flash" }, "gemini/gemini-2.5-pro-preview-03-25": { @@ -4872,9 +4900,9 @@ "max_pdf_size_mb": 30, "input_cost_per_audio_token": 0.0000007, "input_cost_per_token": 0.00000125, - "input_cost_per_token_above_128k_tokens": 0.0000025, + "input_cost_per_token_above_200k_tokens": 0.0000025, "output_cost_per_token": 0.0000010, - "output_cost_per_token_above_128k_tokens": 0.000015, + "output_cost_per_token_above_200k_tokens": 0.000015, "litellm_provider": "gemini", "mode": "chat", "rpm": 10000, @@ -4885,6 +4913,8 @@ "supports_response_schema": true, "supports_audio_output": false, "supports_tool_choice": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview" }, "gemini/gemini-2.0-flash-exp": { @@ -4920,6 +4950,8 @@ "supports_audio_output": true, "tpm": 4000000, "rpm": 10, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, @@ -4946,6 +4978,8 @@ "supports_response_schema": true, "supports_audio_output": false, "supports_tool_choice": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite" }, "gemini/gemini-2.0-flash-thinking-exp": { @@ -4981,6 +5015,8 @@ "supports_audio_output": true, "tpm": 4000000, "rpm": 10, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, @@ -5017,6 +5053,8 @@ "supports_audio_output": true, "tpm": 4000000, "rpm": 10, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 1d6f005587..511a8cde94 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -29,10 +29,14 @@ model_list: model: databricks/databricks-claude-3-7-sonnet api_key: os.environ/DATABRICKS_API_KEY api_base: os.environ/DATABRICKS_API_BASE - - model_name: "gemini/gemini-2.0-flash" + - model_name: "llmaas-meta/llama-3.1-8b-instruct" litellm_params: - model: gemini/gemini-2.0-flash - api_key: os.environ/GEMINI_API_KEY + model: nvidia_nim/meta/llama-3.3-70b-instruct + api_key: "invalid" + api_base: "http://0.0.0.0:8090" + model_info: + input_cost_per_token: "100" + output_cost_per_token: "100" litellm_settings: num_retries: 0 diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 6f0c26d301..3af5ca157e 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -120,6 +120,9 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False): input_cost_per_character: Optional[float] # only for vertex ai models input_cost_per_audio_token: Optional[float] input_cost_per_token_above_128k_tokens: Optional[float] # only for vertex ai models + input_cost_per_token_above_200k_tokens: Optional[ + float + ] # only for vertex ai gemini-2.5-pro models input_cost_per_character_above_128k_tokens: Optional[ float ] # only for vertex ai models @@ -136,6 +139,9 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False): output_cost_per_token_above_128k_tokens: Optional[ float ] # only for vertex ai models + output_cost_per_token_above_200k_tokens: Optional[ + float + ] # only for vertex ai gemini-2.5-pro models output_cost_per_character_above_128k_tokens: Optional[ float ] # only for vertex ai models diff --git a/litellm/utils.py b/litellm/utils.py index f809d8a77b..9ebe0f4b09 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4532,6 +4532,9 @@ def _get_model_info_helper( # noqa: PLR0915 input_cost_per_token_above_128k_tokens=_model_info.get( "input_cost_per_token_above_128k_tokens", None ), + input_cost_per_token_above_200k_tokens=_model_info.get( + "input_cost_per_token_above_200k_tokens", None + ), input_cost_per_query=_model_info.get("input_cost_per_query", None), input_cost_per_second=_model_info.get("input_cost_per_second", None), input_cost_per_audio_token=_model_info.get( @@ -4556,6 +4559,9 @@ def _get_model_info_helper( # noqa: PLR0915 output_cost_per_character_above_128k_tokens=_model_info.get( "output_cost_per_character_above_128k_tokens", None ), + output_cost_per_token_above_200k_tokens=_model_info.get( + "output_cost_per_token_above_200k_tokens", None + ), output_cost_per_second=_model_info.get("output_cost_per_second", None), output_cost_per_image=_model_info.get("output_cost_per_image", None), output_vector_size=_model_info.get("output_vector_size", None), diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 7e5be4dc6b..c67c3b85af 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -380,6 +380,7 @@ "supports_tool_choice": true, "supports_native_streaming": false, "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supported_endpoints": ["/v1/responses", "/v1/batch"] }, "o1-pro-2025-03-19": { @@ -401,6 +402,7 @@ "supports_tool_choice": true, "supports_native_streaming": false, "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supported_endpoints": ["/v1/responses", "/v1/batch"] }, "o1": { @@ -4512,20 +4514,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_image": 0, - "input_cost_per_video_per_second": 0, - "input_cost_per_audio_per_second": 0, - "input_cost_per_token": 0, - "input_cost_per_character": 0, - "input_cost_per_token_above_128k_tokens": 0, - "input_cost_per_character_above_128k_tokens": 0, - "input_cost_per_image_above_128k_tokens": 0, - "input_cost_per_video_per_second_above_128k_tokens": 0, - "input_cost_per_audio_per_second_above_128k_tokens": 0, - "output_cost_per_token": 0, - "output_cost_per_character": 0, - "output_cost_per_token_above_128k_tokens": 0, - "output_cost_per_character_above_128k_tokens": 0, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -4536,6 +4528,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" }, "gemini-2.0-pro-exp-02-05": { @@ -4548,20 +4543,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_image": 0, - "input_cost_per_video_per_second": 0, - "input_cost_per_audio_per_second": 0, - "input_cost_per_token": 0, - "input_cost_per_character": 0, - "input_cost_per_token_above_128k_tokens": 0, - "input_cost_per_character_above_128k_tokens": 0, - "input_cost_per_image_above_128k_tokens": 0, - "input_cost_per_video_per_second_above_128k_tokens": 0, - "input_cost_per_audio_per_second_above_128k_tokens": 0, - "output_cost_per_token": 0, - "output_cost_per_character": 0, - "output_cost_per_token_above_128k_tokens": 0, - "output_cost_per_character_above_128k_tokens": 0, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -4572,6 +4557,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" }, "gemini-2.0-flash-exp": { @@ -4605,6 +4593,8 @@ "supports_vision": true, "supports_response_schema": true, "supports_audio_output": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supports_tool_choice": true }, @@ -4629,6 +4619,8 @@ "supports_response_schema": true, "supports_audio_output": true, "supports_tool_choice": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" }, "gemini-2.0-flash-thinking-exp": { @@ -4662,6 +4654,8 @@ "supports_vision": true, "supports_response_schema": true, "supports_audio_output": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, @@ -4696,6 +4690,8 @@ "supports_vision": true, "supports_response_schema": false, "supports_audio_output": false, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, @@ -4721,6 +4717,7 @@ "supports_audio_output": true, "supports_audio_input": true, "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_tool_choice": true, "source": "https://ai.google.dev/pricing#2_0flash" }, @@ -4743,6 +4740,32 @@ "supports_vision": true, "supports_response_schema": true, "supports_audio_output": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", + "supports_tool_choice": true + }, + "gemini-2.0-flash-lite-001": { + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 50, + "input_cost_per_audio_token": 0.000000075, + "input_cost_per_token": 0.000000075, + "output_cost_per_token": 0.0000003, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_audio_output": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, @@ -4808,6 +4831,7 @@ "supports_audio_output": true, "supports_audio_input": true, "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_tool_choice": true, "source": "https://ai.google.dev/pricing#2_0flash" }, @@ -4833,6 +4857,8 @@ "supports_response_schema": true, "supports_audio_output": true, "supports_tool_choice": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite" }, "gemini/gemini-2.0-flash-001": { @@ -4858,6 +4884,8 @@ "supports_response_schema": true, "supports_audio_output": false, "supports_tool_choice": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://ai.google.dev/pricing#2_0flash" }, "gemini/gemini-2.5-pro-preview-03-25": { @@ -4872,9 +4900,9 @@ "max_pdf_size_mb": 30, "input_cost_per_audio_token": 0.0000007, "input_cost_per_token": 0.00000125, - "input_cost_per_token_above_128k_tokens": 0.0000025, + "input_cost_per_token_above_200k_tokens": 0.0000025, "output_cost_per_token": 0.0000010, - "output_cost_per_token_above_128k_tokens": 0.000015, + "output_cost_per_token_above_200k_tokens": 0.000015, "litellm_provider": "gemini", "mode": "chat", "rpm": 10000, @@ -4885,6 +4913,8 @@ "supports_response_schema": true, "supports_audio_output": false, "supports_tool_choice": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview" }, "gemini/gemini-2.0-flash-exp": { @@ -4920,6 +4950,8 @@ "supports_audio_output": true, "tpm": 4000000, "rpm": 10, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, @@ -4946,6 +4978,8 @@ "supports_response_schema": true, "supports_audio_output": false, "supports_tool_choice": true, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite" }, "gemini/gemini-2.0-flash-thinking-exp": { @@ -4981,6 +5015,8 @@ "supports_audio_output": true, "tpm": 4000000, "rpm": 10, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, @@ -5017,6 +5053,8 @@ "supports_audio_output": true, "tpm": 4000000, "rpm": 10, + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, diff --git a/requirements.txt b/requirements.txt index 20ef862715..e1a666f8c1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,6 @@ gunicorn==23.0.0 # server dep uvloop==0.21.0 # uvicorn dep, gives us much better performance under load boto3==1.34.34 # aws bedrock/sagemaker calls redis==5.2.1 # redis caching -redisvl==0.4.1 # semantic caching prisma==0.11.0 # for db mangum==0.17.0 # for aws lambda functions pynacl==1.5.0 # for encrypting keys diff --git a/tests/litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py b/tests/litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py new file mode 100644 index 0000000000..8f8f043935 --- /dev/null +++ b/tests/litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py @@ -0,0 +1,50 @@ +import json +import os +import sys + +import pytest +from fastapi.testclient import TestClient + +import litellm +from litellm.litellm_core_utils.llm_cost_calc.tool_call_cost_tracking import ( + StandardBuiltInToolCostTracking, +) +from litellm.types.llms.openai import FileSearchTool, WebSearchOptions +from litellm.types.utils import ModelInfo, ModelResponse, StandardBuiltInToolsParams + +sys.path.insert( + 0, os.path.abspath("../../..") +) # Adds the parent directory to the system path + +from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_token +from litellm.types.utils import Usage + + +def test_generic_cost_per_token_above_200k_tokens(): + model = "gemini-2.5-pro-exp-03-25" + custom_llm_provider = "vertex_ai" + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + + model_cost_map = litellm.model_cost[model] + prompt_tokens = 220 * 1e6 + completion_tokens = 150 + usage = Usage( + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=prompt_tokens + completion_tokens, + ) + prompt_cost, completion_cost = generic_cost_per_token( + model=model, + usage=usage, + custom_llm_provider=custom_llm_provider, + ) + assert round(prompt_cost, 10) == round( + model_cost_map["input_cost_per_token_above_200k_tokens"] * usage.prompt_tokens, + 10, + ) + assert round(completion_cost, 10) == round( + model_cost_map["output_cost_per_token_above_200k_tokens"] + * usage.completion_tokens, + 10, + ) diff --git a/tests/local_testing/test_get_model_info.py b/tests/local_testing/test_get_model_info.py index f6fd790921..a8c19fde5a 100644 --- a/tests/local_testing/test_get_model_info.py +++ b/tests/local_testing/test_get_model_info.py @@ -431,6 +431,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid(): "input_cost_per_character_above_128k_tokens": {"type": "number"}, "input_cost_per_image": {"type": "number"}, "input_cost_per_image_above_128k_tokens": {"type": "number"}, + "input_cost_per_token_above_200k_tokens": {"type": "number"}, "input_cost_per_pixel": {"type": "number"}, "input_cost_per_query": {"type": "number"}, "input_cost_per_request": {"type": "number"}, @@ -483,6 +484,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid(): "output_cost_per_second": {"type": "number"}, "output_cost_per_token": {"type": "number"}, "output_cost_per_token_above_128k_tokens": {"type": "number"}, + "output_cost_per_token_above_200k_tokens": {"type": "number"}, "output_cost_per_token_batches": {"type": "number"}, "output_db_cost_per_token": {"type": "number"}, "output_dbu_cost_per_token": {"type": "number"}, @@ -541,6 +543,13 @@ def test_aaamodel_prices_and_context_window_json_is_valid(): "enum": ["text", "audio", "image", "video"], }, }, + "supported_output_modalities": { + "type": "array", + "items": { + "type": "string", + "enum": ["text", "image"], + }, + }, "supports_native_streaming": {"type": "boolean"}, }, "additionalProperties": False,