diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index 916d68150..b893e6646 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -87,8 +87,8 @@ def cost_per_token( custom_llm_provider: Optional[str] = None, region_name=None, ### CHARACTER PRICING ### - prompt_characters: int = 0, - completion_characters: int = 0, + prompt_characters: Optional[int] = None, + completion_characters: Optional[int] = None, ### PROMPT CACHING PRICING ### - used for anthropic cache_creation_input_tokens: Optional[int] = 0, cache_read_input_tokens: Optional[int] = 0, @@ -201,13 +201,24 @@ def cost_per_token( model = model_without_prefix # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models - print_verbose(f"Looking up model={model} in model_cost_map") + print_verbose( + f"Looking up model={model} in model_cost_map, custom_llm_provider={custom_llm_provider}, call_type={call_type}" + ) if call_type == "speech" or call_type == "aspeech": + if prompt_characters is None: + raise ValueError( + "prompt_characters must be provided for tts calls. prompt_characters={}, model={}, custom_llm_provider={}, call_type={}".format( + prompt_characters, + model, + custom_llm_provider, + call_type, + ) + ) prompt_cost, completion_cost = _generic_cost_per_character( model=model_without_prefix, custom_llm_provider=custom_llm_provider, prompt_characters=prompt_characters, - completion_characters=completion_characters, + completion_characters=0, custom_prompt_cost=None, custom_completion_cost=0, ) @@ -232,10 +243,6 @@ def cost_per_token( cost_router = google_cost_router( model=model_without_prefix, custom_llm_provider=custom_llm_provider, - prompt_characters=prompt_characters, - completion_characters=completion_characters, - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, call_type=call_type, ) if cost_router == "cost_per_character": @@ -542,9 +549,9 @@ def completion_cost( model = "dall-e-2" # for dall-e-2, azure expects an empty model name # Handle Inputs to completion_cost prompt_tokens = 0 - prompt_characters = 0 + prompt_characters: Optional[int] = None completion_tokens = 0 - completion_characters = 0 + completion_characters: Optional[int] = None cache_creation_input_tokens: Optional[int] = None cache_read_input_tokens: Optional[int] = None if completion_response is not None and ( @@ -721,10 +728,8 @@ def completion_cost( prompt_string = litellm.utils.get_formatted_prompt( data={"messages": messages}, call_type="completion" ) - else: - prompt_string = "" - prompt_characters = litellm.utils._count_characters(text=prompt_string) + prompt_characters = litellm.utils._count_characters(text=prompt_string) if completion_response is not None and isinstance( completion_response, ModelResponse ): diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index b2b3f5392..9bea6b9a9 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -901,7 +901,9 @@ class Logging: complete_streaming_response = None else: self.sync_streaming_chunks.append(result) - + _caching_complete_streaming_response: Optional[ + Union[ModelResponse, TextCompletionResponse] + ] = None if complete_streaming_response is not None: verbose_logger.debug( "Logging Details LiteLLM-Success Call streaming complete" @@ -909,6 +911,9 @@ class Logging: self.model_call_details["complete_streaming_response"] = ( complete_streaming_response ) + _caching_complete_streaming_response = copy.deepcopy( + complete_streaming_response + ) self.model_call_details["response_cost"] = ( self._response_cost_calculator(result=complete_streaming_response) ) @@ -937,6 +942,20 @@ class Logging: else: callbacks = litellm.success_callback + ## STREAMING CACHING ## + if "cache" in callbacks and litellm.cache is not None: + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + print_verbose("success_callback: reaches cache for logging!") + kwargs = self.model_call_details + if self.stream and _caching_complete_streaming_response is not None: + print_verbose( + "success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache" + ) + result = _caching_complete_streaming_response + # only add to cache once we have a complete streaming response + litellm.cache.add_cache(result, **kwargs) + + ## REDACT MESSAGES ## result = redact_message_input_output_from_logging( model_call_details=( self.model_call_details @@ -1302,23 +1321,6 @@ class Logging: end_time=end_time, print_verbose=print_verbose, ) - if callback == "cache" and litellm.cache is not None: - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - print_verbose("success_callback: reaches cache for logging!") - kwargs = self.model_call_details - if self.stream: - if "complete_streaming_response" not in kwargs: - print_verbose( - f"success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n" - ) - pass - else: - print_verbose( - "success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache" - ) - result = kwargs["complete_streaming_response"] - # only add to cache once we have a complete streaming response - litellm.cache.add_cache(result, **kwargs) if callback == "athina" and athinaLogger is not None: deep_copy = {} for k, v in self.model_call_details.items(): diff --git a/litellm/litellm_core_utils/llm_cost_calc/google.py b/litellm/litellm_core_utils/llm_cost_calc/google.py index b42376884..cad907cd6 100644 --- a/litellm/litellm_core_utils/llm_cost_calc/google.py +++ b/litellm/litellm_core_utils/llm_cost_calc/google.py @@ -32,10 +32,6 @@ def _is_above_128k(tokens: float) -> bool: def cost_router( model: str, custom_llm_provider: str, - prompt_tokens: float, - completion_tokens: float, - prompt_characters: float, - completion_characters: float, call_type: Union[Literal["embedding", "aembedding"], str], ) -> Literal["cost_per_character", "cost_per_token"]: """ @@ -66,8 +62,8 @@ def cost_per_character( custom_llm_provider: str, prompt_tokens: float, completion_tokens: float, - prompt_characters: float, - completion_characters: float, + prompt_characters: Optional[float] = None, + completion_characters: Optional[float] = None, ) -> Tuple[float, float]: """ Calculates the cost per character for a given VertexAI model, input messages, and response object. @@ -94,87 +90,100 @@ def cost_per_character( ) ## CALCULATE INPUT COST - try: - if ( - _is_above_128k(tokens=prompt_characters * 4) # 1 token = 4 char - and model not in models_without_dynamic_pricing - ): - ## check if character pricing, else default to token pricing - assert ( - "input_cost_per_character_above_128k_tokens" in model_info - and model_info["input_cost_per_character_above_128k_tokens"] is not None - ), "model info for model={} does not have 'input_cost_per_character_above_128k_tokens'-pricing for > 128k tokens\nmodel_info={}".format( - model, model_info - ) - prompt_cost = ( - prompt_characters - * model_info["input_cost_per_character_above_128k_tokens"] - ) - else: - assert ( - "input_cost_per_character" in model_info - and model_info["input_cost_per_character"] is not None - ), "model info for model={} does not have 'input_cost_per_character'-pricing\nmodel_info={}".format( - model, model_info - ) - prompt_cost = prompt_characters * model_info["input_cost_per_character"] - except Exception as e: - verbose_logger.exception( - "litellm.litellm_core_utils.llm_cost_calc.google.cost_per_character(): Defaulting to (cost_per_token * 4) calculation for prompt_cost. Exception occured - {}".format( - str(e) - ) - ) - initial_prompt_cost, _ = cost_per_token( + if prompt_characters is None: + prompt_cost, _ = cost_per_token( model=model, custom_llm_provider=custom_llm_provider, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, ) - - prompt_cost = initial_prompt_cost * 4 + else: + try: + if ( + _is_above_128k(tokens=prompt_characters * 4) # 1 token = 4 char + and model not in models_without_dynamic_pricing + ): + ## check if character pricing, else default to token pricing + assert ( + "input_cost_per_character_above_128k_tokens" in model_info + and model_info["input_cost_per_character_above_128k_tokens"] + is not None + ), "model info for model={} does not have 'input_cost_per_character_above_128k_tokens'-pricing for > 128k tokens\nmodel_info={}".format( + model, model_info + ) + prompt_cost = ( + prompt_characters + * model_info["input_cost_per_character_above_128k_tokens"] + ) + else: + assert ( + "input_cost_per_character" in model_info + and model_info["input_cost_per_character"] is not None + ), "model info for model={} does not have 'input_cost_per_character'-pricing\nmodel_info={}".format( + model, model_info + ) + prompt_cost = prompt_characters * model_info["input_cost_per_character"] + except Exception as e: + verbose_logger.debug( + "litellm.litellm_core_utils.llm_cost_calc.google.py::cost_per_character(): Exception occured - {}\nDefaulting to None".format( + str(e) + ) + ) + prompt_cost, _ = cost_per_token( + model=model, + custom_llm_provider=custom_llm_provider, + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + ) ## CALCULATE OUTPUT COST - try: - if ( - _is_above_128k(tokens=completion_characters * 4) # 1 token = 4 char - and model not in models_without_dynamic_pricing - ): - assert ( - "output_cost_per_character_above_128k_tokens" in model_info - and model_info["output_cost_per_character_above_128k_tokens"] - is not None - ), "model info for model={} does not have 'output_cost_per_character_above_128k_tokens' pricing\nmodel_info={}".format( - model, model_info - ) - completion_cost = ( - completion_tokens - * model_info["output_cost_per_character_above_128k_tokens"] - ) - else: - assert ( - "output_cost_per_character" in model_info - and model_info["output_cost_per_character"] is not None - ), "model info for model={} does not have 'output_cost_per_character'-pricing\nmodel_info={}".format( - model, model_info - ) - completion_cost = ( - completion_tokens * model_info["output_cost_per_character"] - ) - except Exception as e: - verbose_logger.exception( - "litellm.litellm_core_utils.llm_cost_calc.google.cost_per_character(): \ - Defaulting to (cost_per_token * 4) calculation for completion_cost\nException occured - {}".format( - str(e) - ) - ) - _, initial_completion_cost = cost_per_token( + if completion_characters is None: + _, completion_cost = cost_per_token( model=model, custom_llm_provider=custom_llm_provider, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, ) + else: + try: + if ( + _is_above_128k(tokens=completion_characters * 4) # 1 token = 4 char + and model not in models_without_dynamic_pricing + ): + assert ( + "output_cost_per_character_above_128k_tokens" in model_info + and model_info["output_cost_per_character_above_128k_tokens"] + is not None + ), "model info for model={} does not have 'output_cost_per_character_above_128k_tokens' pricing\nmodel_info={}".format( + model, model_info + ) + completion_cost = ( + completion_tokens + * model_info["output_cost_per_character_above_128k_tokens"] + ) + else: + assert ( + "output_cost_per_character" in model_info + and model_info["output_cost_per_character"] is not None + ), "model info for model={} does not have 'output_cost_per_character'-pricing\nmodel_info={}".format( + model, model_info + ) + completion_cost = ( + completion_characters * model_info["output_cost_per_character"] + ) + except Exception as e: + verbose_logger.debug( + "litellm.litellm_core_utils.llm_cost_calc.google.py::cost_per_character(): Exception occured - {}\nDefaulting to None".format( + str(e) + ) + ) + _, completion_cost = cost_per_token( + model=model, + custom_llm_provider=custom_llm_provider, + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + ) - completion_cost = initial_completion_cost * 4 return prompt_cost, completion_cost diff --git a/litellm/litellm_core_utils/llm_cost_calc/utils.py b/litellm/litellm_core_utils/llm_cost_calc/utils.py index 87799bc1f..b97e2f4f0 100644 --- a/litellm/litellm_core_utils/llm_cost_calc/utils.py +++ b/litellm/litellm_core_utils/llm_cost_calc/utils.py @@ -17,9 +17,8 @@ def _generic_cost_per_character( custom_completion_cost: Optional[float], ) -> Tuple[Optional[float], Optional[float]]: """ - Generic function to help calculate cost per character. - """ - """ + Calculates cost per character for aspeech/speech calls. + Calculates the cost per character for a given model, input messages, and response object. Input: @@ -29,7 +28,7 @@ def _generic_cost_per_character( - completion_characters: float, the number of output characters Returns: - Tuple[Optional[float], Optional[float]] - prompt_cost_in_usd, completion_cost_in_usd. + Tuple[Optional[float], Optional[float]] - prompt_cost_in_usd, completion_cost_in_usd. - returns None if not able to calculate cost. Raises: diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py index 0e4c253b7..15928fccd 100644 --- a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py +++ b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py @@ -7,6 +7,7 @@ import os import time import types import uuid +from copy import deepcopy from enum import Enum from functools import partial from typing import ( @@ -65,9 +66,11 @@ from litellm.types.llms.vertex_ai import ( from litellm.types.utils import GenericStreamingChunk from litellm.utils import CustomStreamWrapper, ModelResponse, Usage +from ....utils import _remove_additional_properties, _remove_strict_from_schema from ...base import BaseLLM from ..common_utils import ( VertexAIError, + _build_vertex_schema, _get_gemini_url, _get_vertex_url, all_gemini_url_modes, @@ -376,7 +379,10 @@ class VertexGeminiConfig: def _map_function(self, value: List[dict]) -> List[Tools]: gtool_func_declarations = [] googleSearchRetrieval: Optional[dict] = None - + # remove 'additionalProperties' from tools + value = _remove_additional_properties(value) + # remove 'strict' from tools + value = _remove_strict_from_schema(value) for tool in value: openai_function_object: Optional[ChatCompletionToolParamFunctionChunk] = ( None @@ -437,6 +443,10 @@ class VertexGeminiConfig: if param == "max_tokens" or param == "max_completion_tokens": optional_params["max_output_tokens"] = value if param == "response_format" and isinstance(value, dict): # type: ignore + # remove 'additionalProperties' from json schema + value = _remove_additional_properties(value) + # remove 'strict' from json schema + value = _remove_strict_from_schema(value) if value["type"] == "json_object": optional_params["response_mime_type"] = "application/json" elif value["type"] == "text": @@ -448,6 +458,19 @@ class VertexGeminiConfig: if "json_schema" in value and "schema" in value["json_schema"]: # type: ignore optional_params["response_mime_type"] = "application/json" optional_params["response_schema"] = value["json_schema"]["schema"] # type: ignore + + if "response_schema" in optional_params and isinstance( + optional_params["response_schema"], dict + ): + old_schema = deepcopy(optional_params["response_schema"]) + + if isinstance(old_schema, list): + for item in old_schema: + if isinstance(item, dict): + item = _build_vertex_schema(parameters=item) + elif isinstance(old_schema, dict): + old_schema = _build_vertex_schema(parameters=old_schema) + optional_params["response_schema"] = old_schema if param == "frequency_penalty": optional_params["frequency_penalty"] = value if param == "presence_penalty": diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 0f1ad77a6..a7d9cdfde 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -2106,20 +2106,20 @@ "max_tokens": 8192, "max_input_tokens": 2097152, "max_output_tokens": 8192, - "input_cost_per_image": 0.001315, - "input_cost_per_audio_per_second": 0.000125, - "input_cost_per_video_per_second": 0.001315, - "input_cost_per_token": 0.000005, - "input_cost_per_character": 0.00000125, - "input_cost_per_token_above_128k_tokens": 0.00001, - "input_cost_per_character_above_128k_tokens": 0.0000025, - "output_cost_per_token": 0.000015, - "output_cost_per_character": 0.00000375, - "output_cost_per_token_above_128k_tokens": 0.00003, - "output_cost_per_character_above_128k_tokens": 0.0000075, - "output_cost_per_image": 0.00263, - "output_cost_per_video_per_second": 0.00263, - "output_cost_per_audio_per_second": 0.00025, + "input_cost_per_image": 0.00032875, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_token": 0.000000078125, + "input_cost_per_character": 0.0000003125, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 0.00000015625, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000003125, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2132,20 +2132,20 @@ "max_tokens": 8192, "max_input_tokens": 2097152, "max_output_tokens": 8192, - "input_cost_per_image": 0.001315, - "input_cost_per_audio_per_second": 0.000125, - "input_cost_per_video_per_second": 0.001315, - "input_cost_per_token": 0.000005, - "input_cost_per_character": 0.00000125, - "input_cost_per_token_above_128k_tokens": 0.00001, - "input_cost_per_character_above_128k_tokens": 0.0000025, - "output_cost_per_token": 0.000015, - "output_cost_per_character": 0.00000375, - "output_cost_per_token_above_128k_tokens": 0.00003, - "output_cost_per_character_above_128k_tokens": 0.0000075, - "output_cost_per_image": 0.00263, - "output_cost_per_video_per_second": 0.00263, - "output_cost_per_audio_per_second": 0.00025, + "input_cost_per_image": 0.00032875, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_token": 0.000000078125, + "input_cost_per_character": 0.0000003125, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 0.00000015625, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000003125, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2158,20 +2158,20 @@ "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_image": 0.001315, - "input_cost_per_audio_per_second": 0.000125, - "input_cost_per_video_per_second": 0.001315, - "input_cost_per_token": 0.000005, - "input_cost_per_character": 0.00000125, - "input_cost_per_token_above_128k_tokens": 0.00001, - "input_cost_per_character_above_128k_tokens": 0.0000025, - "output_cost_per_token": 0.000015, - "output_cost_per_character": 0.00000375, - "output_cost_per_token_above_128k_tokens": 0.00003, - "output_cost_per_character_above_128k_tokens": 0.0000075, - "output_cost_per_image": 0.00263, - "output_cost_per_video_per_second": 0.00263, - "output_cost_per_audio_per_second": 0.00025, + "input_cost_per_image": 0.00032875, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_token": 0.000000078125, + "input_cost_per_character": 0.0000003125, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 0.00000015625, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000003125, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2184,20 +2184,20 @@ "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_image": 0.001315, - "input_cost_per_audio_per_second": 0.000125, - "input_cost_per_video_per_second": 0.001315, - "input_cost_per_token": 0.000005, - "input_cost_per_character": 0.00000125, - "input_cost_per_token_above_128k_tokens": 0.00001, - "input_cost_per_character_above_128k_tokens": 0.0000025, - "output_cost_per_token": 0.000015, - "output_cost_per_character": 0.00000375, - "output_cost_per_token_above_128k_tokens": 0.00003, - "output_cost_per_character_above_128k_tokens": 0.0000075, - "output_cost_per_image": 0.00263, - "output_cost_per_video_per_second": 0.00263, - "output_cost_per_audio_per_second": 0.00025, + "input_cost_per_image": 0.00032875, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_token": 0.000000078125, + "input_cost_per_character": 0.0000003125, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 0.00000015625, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000003125, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2210,20 +2210,20 @@ "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_image": 0.001315, - "input_cost_per_audio_per_second": 0.000125, - "input_cost_per_video_per_second": 0.001315, - "input_cost_per_token": 0.000005, - "input_cost_per_character": 0.00000125, - "input_cost_per_token_above_128k_tokens": 0.00001, - "input_cost_per_character_above_128k_tokens": 0.0000025, - "output_cost_per_token": 0.000015, - "output_cost_per_character": 0.00000375, - "output_cost_per_token_above_128k_tokens": 0.00003, - "output_cost_per_character_above_128k_tokens": 0.0000075, - "output_cost_per_image": 0.00263, - "output_cost_per_video_per_second": 0.00263, - "output_cost_per_audio_per_second": 0.00025, + "input_cost_per_image": 0.00032875, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_token": 0.000000078125, + "input_cost_per_character": 0.0000003125, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 0.00000015625, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000003125, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2236,20 +2236,20 @@ "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_image": 0.001315, - "input_cost_per_audio_per_second": 0.000125, - "input_cost_per_video_per_second": 0.001315, - "input_cost_per_token": 0.000005, - "input_cost_per_character": 0.00000125, - "input_cost_per_token_above_128k_tokens": 0.00001, - "input_cost_per_character_above_128k_tokens": 0.0000025, - "output_cost_per_token": 0.000015, - "output_cost_per_character": 0.00000375, - "output_cost_per_token_above_128k_tokens": 0.00003, - "output_cost_per_character_above_128k_tokens": 0.0000075, - "output_cost_per_image": 0.00263, - "output_cost_per_video_per_second": 0.00263, - "output_cost_per_audio_per_second": 0.00025, + "input_cost_per_image": 0.00032875, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_token": 0.000000078125, + "input_cost_per_character": 0.0000003125, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 0.00000015625, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000003125, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -2267,20 +2267,20 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_image": 0.0001315, - "input_cost_per_video_per_second": 0.0001315, - "input_cost_per_audio_per_second": 0.000125, - "input_cost_per_token": 0.0000005, - "input_cost_per_character": 0.000000125, + "input_cost_per_image": 0.00002, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_token": 0.000000004688, + "input_cost_per_character": 0.00000001875, "input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_character_above_128k_tokens": 0.00000025, - "output_cost_per_token": 0.0000015, - "output_cost_per_character": 0.000000375, - "output_cost_per_token_above_128k_tokens": 0.000003, - "output_cost_per_character_above_128k_tokens": 0.00000075, - "output_cost_per_image": 0.000263, - "output_cost_per_video_per_second": 0.000263, - "output_cost_per_audio_per_second": 0.00025, + "input_cost_per_image_above_128k_tokens": 0.00004, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, + "output_cost_per_token": 0.0000000046875, + "output_cost_per_character": 0.00000001875, + "output_cost_per_token_above_128k_tokens": 0.000000009375, + "output_cost_per_character_above_128k_tokens": 0.0000000375, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2299,20 +2299,20 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_image": 0.0001315, - "input_cost_per_video_per_second": 0.0001315, - "input_cost_per_audio_per_second": 0.000125, - "input_cost_per_token": 0.0000005, - "input_cost_per_character": 0.000000125, + "input_cost_per_image": 0.00002, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_token": 0.000000004688, + "input_cost_per_character": 0.00000001875, "input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_character_above_128k_tokens": 0.00000025, - "output_cost_per_token": 0.0000015, - "output_cost_per_character": 0.000000375, - "output_cost_per_token_above_128k_tokens": 0.000003, - "output_cost_per_character_above_128k_tokens": 0.00000075, - "output_cost_per_image": 0.000263, - "output_cost_per_video_per_second": 0.000263, - "output_cost_per_audio_per_second": 0.00025, + "input_cost_per_image_above_128k_tokens": 0.00004, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, + "output_cost_per_token": 0.0000000046875, + "output_cost_per_character": 0.00000001875, + "output_cost_per_token_above_128k_tokens": 0.000000009375, + "output_cost_per_character_above_128k_tokens": 0.0000000375, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2331,20 +2331,20 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_image": 0.0001315, - "input_cost_per_video_per_second": 0.0001315, - "input_cost_per_audio_per_second": 0.000125, - "input_cost_per_token": 0.0000005, - "input_cost_per_character": 0.000000125, + "input_cost_per_image": 0.00002, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_token": 0.000000004688, + "input_cost_per_character": 0.00000001875, "input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_character_above_128k_tokens": 0.00000025, - "output_cost_per_token": 0.0000015, - "output_cost_per_character": 0.000000375, - "output_cost_per_token_above_128k_tokens": 0.000003, - "output_cost_per_character_above_128k_tokens": 0.00000075, - "output_cost_per_image": 0.000263, - "output_cost_per_video_per_second": 0.000263, - "output_cost_per_audio_per_second": 0.00025, + "input_cost_per_image_above_128k_tokens": 0.00004, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, + "output_cost_per_token": 0.0000000046875, + "output_cost_per_character": 0.00000001875, + "output_cost_per_token_above_128k_tokens": 0.000000009375, + "output_cost_per_character_above_128k_tokens": 0.0000000375, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2363,20 +2363,20 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_image": 0.0001315, - "input_cost_per_video_per_second": 0.0001315, - "input_cost_per_audio_per_second": 0.000125, - "input_cost_per_token": 0.0000005, - "input_cost_per_character": 0.000000125, + "input_cost_per_image": 0.00002, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_token": 0.000000004688, + "input_cost_per_character": 0.00000001875, "input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_character_above_128k_tokens": 0.00000025, - "output_cost_per_token": 0.0000015, - "output_cost_per_character": 0.000000375, - "output_cost_per_token_above_128k_tokens": 0.000003, - "output_cost_per_character_above_128k_tokens": 0.00000075, - "output_cost_per_image": 0.000263, - "output_cost_per_video_per_second": 0.000263, - "output_cost_per_audio_per_second": 0.00025, + "input_cost_per_image_above_128k_tokens": 0.00004, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, + "output_cost_per_token": 0.0000000046875, + "output_cost_per_character": 0.00000001875, + "output_cost_per_token_above_128k_tokens": 0.000000009375, + "output_cost_per_character_above_128k_tokens": 0.0000000375, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2395,20 +2395,20 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_image": 0.0001315, - "input_cost_per_video_per_second": 0.0001315, - "input_cost_per_audio_per_second": 0.000125, - "input_cost_per_token": 0.0000005, - "input_cost_per_character": 0.000000125, + "input_cost_per_image": 0.00002, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_token": 0.000000004688, + "input_cost_per_character": 0.00000001875, "input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_character_above_128k_tokens": 0.00000025, - "output_cost_per_token": 0.0000015, - "output_cost_per_character": 0.000000375, - "output_cost_per_token_above_128k_tokens": 0.000003, - "output_cost_per_character_above_128k_tokens": 0.00000075, - "output_cost_per_image": 0.000263, - "output_cost_per_video_per_second": 0.000263, - "output_cost_per_audio_per_second": 0.00025, + "input_cost_per_image_above_128k_tokens": 0.00004, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, + "output_cost_per_token": 0.0000000046875, + "output_cost_per_character": 0.00000001875, + "output_cost_per_token_above_128k_tokens": 0.000000009375, + "output_cost_per_character_above_128k_tokens": 0.0000000375, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 4920e29ba..536976ce4 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -6,5 +6,7 @@ model_list: api_base: os.environ/AZURE_API_BASE litellm_settings: - success_callback: ["langfuse"] - max_internal_user_budget: 10 \ No newline at end of file + turn_off_message_logging: true + cache: True + cache_params: + type: local \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index 15266ad34..28706c69a 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -2771,6 +2771,11 @@ def get_optional_params_embeddings( def _remove_additional_properties(schema): + """ + clean out 'additionalProperties = False'. Causes vertexai/gemini OpenAI API Schema errors - https://github.com/langchain-ai/langchainjs/issues/5240 + + Relevant Issues: https://github.com/BerriAI/litellm/issues/6136, https://github.com/BerriAI/litellm/issues/6088 + """ if isinstance(schema, dict): # Remove the 'additionalProperties' key if it exists and is set to False if "additionalProperties" in schema and schema["additionalProperties"] is False: @@ -2789,6 +2794,9 @@ def _remove_additional_properties(schema): def _remove_strict_from_schema(schema): + """ + Relevant Issues: https://github.com/BerriAI/litellm/issues/6136, https://github.com/BerriAI/litellm/issues/6088 + """ if isinstance(schema, dict): # Remove the 'additionalProperties' key if it exists and is set to False if "strict" in schema: @@ -3000,37 +3008,6 @@ def get_optional_params( non_default_params["response_format"] = type_to_response_format_param( response_format=non_default_params["response_format"] ) - # # clean out 'additionalProperties = False'. Causes vertexai/gemini OpenAI API Schema errors - https://github.com/langchain-ai/langchainjs/issues/5240 - if ( - non_default_params["response_format"] is not None - and non_default_params["response_format"] - .get("json_schema", {}) - .get("schema") - is not None - and custom_llm_provider - in [ - "gemini", - "vertex_ai", - "vertex_ai_beta", - ] - ): - from litellm.llms.vertex_ai_and_google_ai_studio.common_utils import ( - _build_vertex_schema, - ) - - old_schema = copy.deepcopy( - non_default_params["response_format"] - .get("json_schema", {}) - .get("schema") - ) - new_schema = _remove_additional_properties(schema=old_schema) - if isinstance(new_schema, list): - for item in new_schema: - if isinstance(item, dict): - item = _build_vertex_schema(parameters=item) - elif isinstance(new_schema, dict): - new_schema = _build_vertex_schema(parameters=new_schema) - non_default_params["response_format"]["json_schema"]["schema"] = new_schema if "tools" in non_default_params and isinstance( non_default_params, list ): # fixes https://github.com/BerriAI/litellm/issues/4933 @@ -3197,7 +3174,7 @@ def get_optional_params( if stream: optional_params["stream"] = stream - #return optional_params + # return optional_params if max_tokens is not None: if "vicuna" in model or "flan" in model: optional_params["max_length"] = max_tokens @@ -4900,6 +4877,10 @@ def _strip_model_name(model: str) -> str: return strip_finetune +def _get_model_info_from_model_cost(key: str) -> dict: + return litellm.model_cost[key] + + def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> ModelInfo: """ Get a dict for the maximum tokens (context window), input_cost_per_token, output_cost_per_token for a given model. @@ -5041,14 +5022,16 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod """ Check if: (in order of specificity) 1. 'custom_llm_provider/model' in litellm.model_cost. Checks "groq/llama3-8b-8192" if model="llama3-8b-8192" and custom_llm_provider="groq" - 2. 'combined_stripped_model_name' in litellm.model_cost. Checks if 'gemini/gemini-1.5-flash' in model map, if 'gemini/gemini-1.5-flash-001' given. - 3. 'stripped_model_name' in litellm.model_cost. Checks if 'ft:gpt-3.5-turbo' in model map, if 'ft:gpt-3.5-turbo:my-org:custom_suffix:id' given. - 4. 'model' in litellm.model_cost. Checks "groq/llama3-8b-8192" in litellm.model_cost if model="groq/llama3-8b-8192" and custom_llm_provider=None + 2. 'model' in litellm.model_cost. Checks "gemini-1.5-pro-002" in litellm.model_cost if model="gemini-1.5-pro-002" and custom_llm_provider=None + 3. 'combined_stripped_model_name' in litellm.model_cost. Checks if 'gemini/gemini-1.5-flash' in model map, if 'gemini/gemini-1.5-flash-001' given. + 4. 'stripped_model_name' in litellm.model_cost. Checks if 'ft:gpt-3.5-turbo' in model map, if 'ft:gpt-3.5-turbo:my-org:custom_suffix:id' given. 5. 'split_model' in litellm.model_cost. Checks "llama3-8b-8192" in litellm.model_cost if model="groq/llama3-8b-8192" """ + _model_info: Optional[Dict[str, Any]] = None + key: Optional[str] = None if combined_model_name in litellm.model_cost: key = combined_model_name - _model_info = litellm.model_cost[combined_model_name] + _model_info = _get_model_info_from_model_cost(key=key) _model_info["supported_openai_params"] = supported_openai_params if ( "litellm_provider" in _model_info @@ -5059,58 +5042,10 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod ].startswith("vertex_ai"): pass else: - raise Exception - elif combined_stripped_model_name in litellm.model_cost: - key = combined_stripped_model_name - _model_info = litellm.model_cost[combined_stripped_model_name] - _model_info["supported_openai_params"] = supported_openai_params - if ( - "litellm_provider" in _model_info - and _model_info["litellm_provider"] != custom_llm_provider - ): - if custom_llm_provider == "vertex_ai" and _model_info[ - "litellm_provider" - ].startswith("vertex_ai"): - pass - elif custom_llm_provider == "fireworks_ai" and _model_info[ - "litellm_provider" - ].startswith("fireworks_ai"): - pass - else: - raise Exception( - "Got provider={}, Expected provider={}, for model={}".format( - _model_info["litellm_provider"], - custom_llm_provider, - model, - ) - ) - elif stripped_model_name in litellm.model_cost: - key = stripped_model_name - _model_info = litellm.model_cost[stripped_model_name] - _model_info["supported_openai_params"] = supported_openai_params - if ( - "litellm_provider" in _model_info - and _model_info["litellm_provider"] != custom_llm_provider - ): - if custom_llm_provider == "vertex_ai" and _model_info[ - "litellm_provider" - ].startswith("vertex_ai"): - pass - elif custom_llm_provider == "fireworks_ai" and _model_info[ - "litellm_provider" - ].startswith("fireworks_ai"): - pass - else: - raise Exception( - "Got provider={}, Expected provider={}, for model={}".format( - _model_info["litellm_provider"], - custom_llm_provider, - model, - ) - ) - elif model in litellm.model_cost: + _model_info = None + if _model_info is None and model in litellm.model_cost: key = model - _model_info = litellm.model_cost[model] + _model_info = _get_model_info_from_model_cost(key=key) _model_info["supported_openai_params"] = supported_openai_params if ( "litellm_provider" in _model_info @@ -5125,10 +5060,50 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod ].startswith("fireworks_ai"): pass else: - raise Exception - elif split_model in litellm.model_cost: + _model_info = None + if ( + _model_info is None + and combined_stripped_model_name in litellm.model_cost + ): + key = combined_stripped_model_name + _model_info = _get_model_info_from_model_cost(key=key) + _model_info["supported_openai_params"] = supported_openai_params + if ( + "litellm_provider" in _model_info + and _model_info["litellm_provider"] != custom_llm_provider + ): + if custom_llm_provider == "vertex_ai" and _model_info[ + "litellm_provider" + ].startswith("vertex_ai"): + pass + elif custom_llm_provider == "fireworks_ai" and _model_info[ + "litellm_provider" + ].startswith("fireworks_ai"): + pass + else: + _model_info = None + if _model_info is None and stripped_model_name in litellm.model_cost: + key = stripped_model_name + _model_info = _get_model_info_from_model_cost(key=key) + _model_info["supported_openai_params"] = supported_openai_params + if ( + "litellm_provider" in _model_info + and _model_info["litellm_provider"] != custom_llm_provider + ): + if custom_llm_provider == "vertex_ai" and _model_info[ + "litellm_provider" + ].startswith("vertex_ai"): + pass + elif custom_llm_provider == "fireworks_ai" and _model_info[ + "litellm_provider" + ].startswith("fireworks_ai"): + pass + else: + _model_info = None + + if _model_info is None and split_model in litellm.model_cost: key = split_model - _model_info = litellm.model_cost[split_model] + _model_info = _get_model_info_from_model_cost(key=key) _model_info["supported_openai_params"] = supported_openai_params if ( "litellm_provider" in _model_info @@ -5143,8 +5118,8 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod ].startswith("fireworks_ai"): pass else: - raise Exception - else: + _model_info = None + if _model_info is None or key is None: raise ValueError( "This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json" ) @@ -5212,7 +5187,7 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod litellm_provider=_model_info.get( "litellm_provider", custom_llm_provider ), - mode=_model_info.get("mode"), + mode=_model_info.get("mode"), # type: ignore supported_openai_params=supported_openai_params, supports_system_messages=_model_info.get( "supports_system_messages", None @@ -9260,10 +9235,6 @@ def process_response_headers(response_headers: Union[httpx.Headers, dict]) -> di processed_headers[k] = v else: additional_headers["{}-{}".format("llm_provider", k)] = v - ## GUARANTEE OPENAI HEADERS IN RESPONSE - for item in OPENAI_RESPONSE_HEADERS: - if item not in openai_headers: - openai_headers[item] = None additional_headers = { **openai_headers, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 0f1ad77a6..a7d9cdfde 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -2106,20 +2106,20 @@ "max_tokens": 8192, "max_input_tokens": 2097152, "max_output_tokens": 8192, - "input_cost_per_image": 0.001315, - "input_cost_per_audio_per_second": 0.000125, - "input_cost_per_video_per_second": 0.001315, - "input_cost_per_token": 0.000005, - "input_cost_per_character": 0.00000125, - "input_cost_per_token_above_128k_tokens": 0.00001, - "input_cost_per_character_above_128k_tokens": 0.0000025, - "output_cost_per_token": 0.000015, - "output_cost_per_character": 0.00000375, - "output_cost_per_token_above_128k_tokens": 0.00003, - "output_cost_per_character_above_128k_tokens": 0.0000075, - "output_cost_per_image": 0.00263, - "output_cost_per_video_per_second": 0.00263, - "output_cost_per_audio_per_second": 0.00025, + "input_cost_per_image": 0.00032875, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_token": 0.000000078125, + "input_cost_per_character": 0.0000003125, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 0.00000015625, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000003125, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2132,20 +2132,20 @@ "max_tokens": 8192, "max_input_tokens": 2097152, "max_output_tokens": 8192, - "input_cost_per_image": 0.001315, - "input_cost_per_audio_per_second": 0.000125, - "input_cost_per_video_per_second": 0.001315, - "input_cost_per_token": 0.000005, - "input_cost_per_character": 0.00000125, - "input_cost_per_token_above_128k_tokens": 0.00001, - "input_cost_per_character_above_128k_tokens": 0.0000025, - "output_cost_per_token": 0.000015, - "output_cost_per_character": 0.00000375, - "output_cost_per_token_above_128k_tokens": 0.00003, - "output_cost_per_character_above_128k_tokens": 0.0000075, - "output_cost_per_image": 0.00263, - "output_cost_per_video_per_second": 0.00263, - "output_cost_per_audio_per_second": 0.00025, + "input_cost_per_image": 0.00032875, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_token": 0.000000078125, + "input_cost_per_character": 0.0000003125, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 0.00000015625, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000003125, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2158,20 +2158,20 @@ "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_image": 0.001315, - "input_cost_per_audio_per_second": 0.000125, - "input_cost_per_video_per_second": 0.001315, - "input_cost_per_token": 0.000005, - "input_cost_per_character": 0.00000125, - "input_cost_per_token_above_128k_tokens": 0.00001, - "input_cost_per_character_above_128k_tokens": 0.0000025, - "output_cost_per_token": 0.000015, - "output_cost_per_character": 0.00000375, - "output_cost_per_token_above_128k_tokens": 0.00003, - "output_cost_per_character_above_128k_tokens": 0.0000075, - "output_cost_per_image": 0.00263, - "output_cost_per_video_per_second": 0.00263, - "output_cost_per_audio_per_second": 0.00025, + "input_cost_per_image": 0.00032875, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_token": 0.000000078125, + "input_cost_per_character": 0.0000003125, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 0.00000015625, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000003125, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2184,20 +2184,20 @@ "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_image": 0.001315, - "input_cost_per_audio_per_second": 0.000125, - "input_cost_per_video_per_second": 0.001315, - "input_cost_per_token": 0.000005, - "input_cost_per_character": 0.00000125, - "input_cost_per_token_above_128k_tokens": 0.00001, - "input_cost_per_character_above_128k_tokens": 0.0000025, - "output_cost_per_token": 0.000015, - "output_cost_per_character": 0.00000375, - "output_cost_per_token_above_128k_tokens": 0.00003, - "output_cost_per_character_above_128k_tokens": 0.0000075, - "output_cost_per_image": 0.00263, - "output_cost_per_video_per_second": 0.00263, - "output_cost_per_audio_per_second": 0.00025, + "input_cost_per_image": 0.00032875, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_token": 0.000000078125, + "input_cost_per_character": 0.0000003125, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 0.00000015625, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000003125, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2210,20 +2210,20 @@ "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_image": 0.001315, - "input_cost_per_audio_per_second": 0.000125, - "input_cost_per_video_per_second": 0.001315, - "input_cost_per_token": 0.000005, - "input_cost_per_character": 0.00000125, - "input_cost_per_token_above_128k_tokens": 0.00001, - "input_cost_per_character_above_128k_tokens": 0.0000025, - "output_cost_per_token": 0.000015, - "output_cost_per_character": 0.00000375, - "output_cost_per_token_above_128k_tokens": 0.00003, - "output_cost_per_character_above_128k_tokens": 0.0000075, - "output_cost_per_image": 0.00263, - "output_cost_per_video_per_second": 0.00263, - "output_cost_per_audio_per_second": 0.00025, + "input_cost_per_image": 0.00032875, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_token": 0.000000078125, + "input_cost_per_character": 0.0000003125, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 0.00000015625, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000003125, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2236,20 +2236,20 @@ "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_image": 0.001315, - "input_cost_per_audio_per_second": 0.000125, - "input_cost_per_video_per_second": 0.001315, - "input_cost_per_token": 0.000005, - "input_cost_per_character": 0.00000125, - "input_cost_per_token_above_128k_tokens": 0.00001, - "input_cost_per_character_above_128k_tokens": 0.0000025, - "output_cost_per_token": 0.000015, - "output_cost_per_character": 0.00000375, - "output_cost_per_token_above_128k_tokens": 0.00003, - "output_cost_per_character_above_128k_tokens": 0.0000075, - "output_cost_per_image": 0.00263, - "output_cost_per_video_per_second": 0.00263, - "output_cost_per_audio_per_second": 0.00025, + "input_cost_per_image": 0.00032875, + "input_cost_per_audio_per_second": 0.00003125, + "input_cost_per_video_per_second": 0.00032875, + "input_cost_per_token": 0.000000078125, + "input_cost_per_character": 0.0000003125, + "input_cost_per_image_above_128k_tokens": 0.0006575, + "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 0.00000015625, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000003125, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -2267,20 +2267,20 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_image": 0.0001315, - "input_cost_per_video_per_second": 0.0001315, - "input_cost_per_audio_per_second": 0.000125, - "input_cost_per_token": 0.0000005, - "input_cost_per_character": 0.000000125, + "input_cost_per_image": 0.00002, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_token": 0.000000004688, + "input_cost_per_character": 0.00000001875, "input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_character_above_128k_tokens": 0.00000025, - "output_cost_per_token": 0.0000015, - "output_cost_per_character": 0.000000375, - "output_cost_per_token_above_128k_tokens": 0.000003, - "output_cost_per_character_above_128k_tokens": 0.00000075, - "output_cost_per_image": 0.000263, - "output_cost_per_video_per_second": 0.000263, - "output_cost_per_audio_per_second": 0.00025, + "input_cost_per_image_above_128k_tokens": 0.00004, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, + "output_cost_per_token": 0.0000000046875, + "output_cost_per_character": 0.00000001875, + "output_cost_per_token_above_128k_tokens": 0.000000009375, + "output_cost_per_character_above_128k_tokens": 0.0000000375, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2299,20 +2299,20 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_image": 0.0001315, - "input_cost_per_video_per_second": 0.0001315, - "input_cost_per_audio_per_second": 0.000125, - "input_cost_per_token": 0.0000005, - "input_cost_per_character": 0.000000125, + "input_cost_per_image": 0.00002, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_token": 0.000000004688, + "input_cost_per_character": 0.00000001875, "input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_character_above_128k_tokens": 0.00000025, - "output_cost_per_token": 0.0000015, - "output_cost_per_character": 0.000000375, - "output_cost_per_token_above_128k_tokens": 0.000003, - "output_cost_per_character_above_128k_tokens": 0.00000075, - "output_cost_per_image": 0.000263, - "output_cost_per_video_per_second": 0.000263, - "output_cost_per_audio_per_second": 0.00025, + "input_cost_per_image_above_128k_tokens": 0.00004, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, + "output_cost_per_token": 0.0000000046875, + "output_cost_per_character": 0.00000001875, + "output_cost_per_token_above_128k_tokens": 0.000000009375, + "output_cost_per_character_above_128k_tokens": 0.0000000375, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2331,20 +2331,20 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_image": 0.0001315, - "input_cost_per_video_per_second": 0.0001315, - "input_cost_per_audio_per_second": 0.000125, - "input_cost_per_token": 0.0000005, - "input_cost_per_character": 0.000000125, + "input_cost_per_image": 0.00002, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_token": 0.000000004688, + "input_cost_per_character": 0.00000001875, "input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_character_above_128k_tokens": 0.00000025, - "output_cost_per_token": 0.0000015, - "output_cost_per_character": 0.000000375, - "output_cost_per_token_above_128k_tokens": 0.000003, - "output_cost_per_character_above_128k_tokens": 0.00000075, - "output_cost_per_image": 0.000263, - "output_cost_per_video_per_second": 0.000263, - "output_cost_per_audio_per_second": 0.00025, + "input_cost_per_image_above_128k_tokens": 0.00004, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, + "output_cost_per_token": 0.0000000046875, + "output_cost_per_character": 0.00000001875, + "output_cost_per_token_above_128k_tokens": 0.000000009375, + "output_cost_per_character_above_128k_tokens": 0.0000000375, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2363,20 +2363,20 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_image": 0.0001315, - "input_cost_per_video_per_second": 0.0001315, - "input_cost_per_audio_per_second": 0.000125, - "input_cost_per_token": 0.0000005, - "input_cost_per_character": 0.000000125, + "input_cost_per_image": 0.00002, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_token": 0.000000004688, + "input_cost_per_character": 0.00000001875, "input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_character_above_128k_tokens": 0.00000025, - "output_cost_per_token": 0.0000015, - "output_cost_per_character": 0.000000375, - "output_cost_per_token_above_128k_tokens": 0.000003, - "output_cost_per_character_above_128k_tokens": 0.00000075, - "output_cost_per_image": 0.000263, - "output_cost_per_video_per_second": 0.000263, - "output_cost_per_audio_per_second": 0.00025, + "input_cost_per_image_above_128k_tokens": 0.00004, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, + "output_cost_per_token": 0.0000000046875, + "output_cost_per_character": 0.00000001875, + "output_cost_per_token_above_128k_tokens": 0.000000009375, + "output_cost_per_character_above_128k_tokens": 0.0000000375, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2395,20 +2395,20 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_image": 0.0001315, - "input_cost_per_video_per_second": 0.0001315, - "input_cost_per_audio_per_second": 0.000125, - "input_cost_per_token": 0.0000005, - "input_cost_per_character": 0.000000125, + "input_cost_per_image": 0.00002, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_token": 0.000000004688, + "input_cost_per_character": 0.00000001875, "input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_character_above_128k_tokens": 0.00000025, - "output_cost_per_token": 0.0000015, - "output_cost_per_character": 0.000000375, - "output_cost_per_token_above_128k_tokens": 0.000003, - "output_cost_per_character_above_128k_tokens": 0.00000075, - "output_cost_per_image": 0.000263, - "output_cost_per_video_per_second": 0.000263, - "output_cost_per_audio_per_second": 0.00025, + "input_cost_per_image_above_128k_tokens": 0.00004, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, + "output_cost_per_token": 0.0000000046875, + "output_cost_per_character": 0.00000001875, + "output_cost_per_token_above_128k_tokens": 0.000000009375, + "output_cost_per_character_above_128k_tokens": 0.0000000375, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, diff --git a/tests/llm_translation/test_optional_params.py b/tests/llm_translation/test_optional_params.py index ff7d91e0a..d7182474d 100644 --- a/tests/llm_translation/test_optional_params.py +++ b/tests/llm_translation/test_optional_params.py @@ -664,9 +664,39 @@ def test_unmapped_gemini_model_params(): assert optional_params["stop_sequences"] == ["stop_word"] -def test_drop_nested_params_vllm(): +def _check_additional_properties(schema): + if isinstance(schema, dict): + # Remove the 'additionalProperties' key if it exists and is set to False + if "additionalProperties" in schema or "strict" in schema: + raise ValueError( + "additionalProperties and strict should not be in the schema" + ) + + # Recursively process all dictionary values + for key, value in schema.items(): + _check_additional_properties(value) + + elif isinstance(schema, list): + # Recursively process all items in the list + for item in schema: + _check_additional_properties(item) + + return schema + + +@pytest.mark.parametrize( + "provider, model", + [ + ("hosted_vllm", "my-vllm-model"), + ("gemini", "gemini-1.5-pro"), + ("vertex_ai", "gemini-1.5-pro"), + ], +) +def test_drop_nested_params_add_prop_and_strict(provider, model): """ Relevant issue - https://github.com/BerriAI/litellm/issues/5288 + + Relevant issue - https://github.com/BerriAI/litellm/issues/6136 """ tools = [ { @@ -690,8 +720,8 @@ def test_drop_nested_params_vllm(): ] tool_choice = {"type": "function", "function": {"name": "structure_output"}} optional_params = get_optional_params( - model="my-vllm-model", - custom_llm_provider="hosted_vllm", + model=model, + custom_llm_provider=provider, temperature=0.2, tools=tools, tool_choice=tool_choice, @@ -700,7 +730,5 @@ def test_drop_nested_params_vllm(): ["tools", "function", "additionalProperties"], ], ) - print(optional_params["tools"][0]["function"]) - assert "additionalProperties" not in optional_params["tools"][0]["function"] - assert "strict" not in optional_params["tools"][0]["function"] + _check_additional_properties(optional_params["tools"]) diff --git a/tests/llm_translation/test_vertex.py b/tests/llm_translation/test_vertex.py new file mode 100644 index 000000000..2847ed371 --- /dev/null +++ b/tests/llm_translation/test_vertex.py @@ -0,0 +1,83 @@ +import json +import os +import sys +import traceback + +from dotenv import load_dotenv + +load_dotenv() +import io +from unittest.mock import AsyncMock, MagicMock, patch + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path + +import litellm + + +def test_completion_pydantic_obj_2(): + from pydantic import BaseModel + from litellm.llms.custom_httpx.http_handler import HTTPHandler + + litellm.set_verbose = True + + class CalendarEvent(BaseModel): + name: str + date: str + participants: list[str] + + class EventsList(BaseModel): + events: list[CalendarEvent] + + messages = [ + {"role": "user", "content": "List important events from the 20th century."} + ] + expected_request_body = { + "contents": [ + { + "role": "user", + "parts": [{"text": "List important events from the 20th century."}], + } + ], + "generationConfig": { + "response_mime_type": "application/json", + "response_schema": { + "properties": { + "events": { + "items": { + "properties": { + "name": {"type": "string"}, + "date": {"type": "string"}, + "participants": { + "items": {"type": "string"}, + "type": "array", + }, + }, + "type": "object", + }, + "type": "array", + } + }, + "type": "object", + }, + }, + } + client = HTTPHandler() + with patch.object(client, "post", new=MagicMock()) as mock_post: + mock_post.return_value = expected_request_body + try: + litellm.completion( + model="gemini/gemini-1.5-pro", + messages=messages, + response_format=EventsList, + client=client, + ) + except Exception as e: + print(e) + + mock_post.assert_called_once() + + print(mock_post.call_args.kwargs) + + assert mock_post.call_args.kwargs["json"] == expected_request_body diff --git a/tests/local_testing/test_caching.py b/tests/local_testing/test_caching.py index 0e9d1f6f2..a98b47603 100644 --- a/tests/local_testing/test_caching.py +++ b/tests/local_testing/test_caching.py @@ -2209,3 +2209,28 @@ async def test_redis_proxy_batch_redis_get_cache(): print(response._hidden_params) assert "cache_key" in response._hidden_params + + +def test_logging_turn_off_message_logging_streaming(): + litellm.turn_off_message_logging = True + mock_obj = Cache(type="local") + litellm.cache = mock_obj + + with patch.object(mock_obj, "add_cache", new=MagicMock()) as mock_client: + print(f"mock_obj.add_cache: {mock_obj.add_cache}") + + resp = litellm.completion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "hi"}], + mock_response="hello", + stream=True, + ) + + for chunk in resp: + continue + + time.sleep(1) + + mock_client.assert_called_once() + + assert mock_client.call_args.args[0].choices[0].message.content == "hello" diff --git a/tests/local_testing/test_completion.py b/tests/local_testing/test_completion.py index 76241985f..bcf57050f 100644 --- a/tests/local_testing/test_completion.py +++ b/tests/local_testing/test_completion.py @@ -1711,31 +1711,6 @@ def test_completion_perplexity_api(): # test_completion_perplexity_api() -@pytest.mark.skip( - reason="too many requests. Hitting gemini rate limits. Convert to mock test." -) -def test_completion_pydantic_obj_2(): - from pydantic import BaseModel - - litellm.set_verbose = True - - class CalendarEvent(BaseModel): - name: str - date: str - participants: list[str] - - class EventsList(BaseModel): - events: list[CalendarEvent] - - messages = [ - {"role": "user", "content": "List important events from the 20th century."} - ] - - response = litellm.completion( - model="gemini/gemini-1.5-pro", messages=messages, response_format=EventsList - ) - - @pytest.mark.skip(reason="this test is flaky") def test_completion_perplexity_api_2(): try: @@ -4573,12 +4548,7 @@ async def test_completion_ai21_chat(): @pytest.mark.parametrize( "model", - [ - "gpt-4o", - "azure/chatgpt-v-2", - "claude-3-sonnet-20240229", - "fireworks_ai/mixtral-8x7b-instruct", - ], + ["gpt-4o", "azure/chatgpt-v-2", "claude-3-sonnet-20240229"], ) @pytest.mark.parametrize( "stream", @@ -4594,5 +4564,7 @@ def test_completion_response_ratelimit_headers(model, stream): additional_headers = hidden_params.get("additional_headers", {}) print(additional_headers) + for k, v in additional_headers.items(): + assert v != "None" and v is not None assert "x-ratelimit-remaining-requests" in additional_headers assert "x-ratelimit-remaining-tokens" in additional_headers diff --git a/tests/local_testing/test_completion_cost.py b/tests/local_testing/test_completion_cost.py index b220e94ea..584f8c841 100644 --- a/tests/local_testing/test_completion_cost.py +++ b/tests/local_testing/test_completion_cost.py @@ -2359,3 +2359,131 @@ def test_together_ai_embedding_completion_cost(): custom_llm_provider="together_ai", call_type="embedding", ) + + +def test_completion_cost_params(): + """ + Relevant Issue: https://github.com/BerriAI/litellm/issues/6133 + """ + litellm.set_verbose = True + resp1_prompt_cost, resp1_completion_cost = cost_per_token( + model="gemini-1.5-pro-002", + prompt_tokens=1000, + completion_tokens=1000, + custom_llm_provider="vertex_ai_beta", + ) + + resp2_prompt_cost, resp2_completion_cost = cost_per_token( + model="gemini-1.5-pro-002", prompt_tokens=1000, completion_tokens=1000 + ) + + assert resp2_prompt_cost > 0 + + assert resp1_prompt_cost == resp2_prompt_cost + assert resp1_completion_cost == resp2_completion_cost + + resp3_prompt_cost, resp3_completion_cost = cost_per_token( + model="vertex_ai/gemini-1.5-pro-002", prompt_tokens=1000, completion_tokens=1000 + ) + + assert resp3_prompt_cost > 0 + + assert resp3_prompt_cost == resp1_prompt_cost + assert resp3_completion_cost == resp1_completion_cost + + +def test_completion_cost_params_2(): + """ + Relevant Issue: https://github.com/BerriAI/litellm/issues/6133 + """ + litellm.set_verbose = True + + prompt_characters = 1000 + completion_characters = 1000 + resp1_prompt_cost, resp1_completion_cost = cost_per_token( + model="gemini-1.5-pro-002", + prompt_characters=prompt_characters, + completion_characters=completion_characters, + prompt_tokens=1000, + completion_tokens=1000, + ) + + print(resp1_prompt_cost, resp1_completion_cost) + + model_info = litellm.get_model_info("gemini-1.5-pro-002") + input_cost_per_character = model_info["input_cost_per_character"] + output_cost_per_character = model_info["output_cost_per_character"] + + assert resp1_prompt_cost == input_cost_per_character * prompt_characters + assert resp1_completion_cost == output_cost_per_character * completion_characters + + +def test_completion_cost_params_gemini_3(): + from litellm.utils import Choices, Message, ModelResponse, Usage + + from litellm.litellm_core_utils.llm_cost_calc.google import cost_per_character + + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + + response = ModelResponse( + id="chatcmpl-61043504-4439-48be-9996-e29bdee24dc3", + choices=[ + Choices( + finish_reason="stop", + index=0, + message=Message( + content="Sí. \n", + role="assistant", + tool_calls=None, + function_call=None, + ), + ) + ], + created=1728529259, + model="gemini-1.5-flash", + object="chat.completion", + system_fingerprint=None, + usage=Usage( + completion_tokens=2, + prompt_tokens=3771, + total_tokens=3773, + completion_tokens_details=None, + prompt_tokens_details=None, + ), + vertex_ai_grounding_metadata=[], + vertex_ai_safety_results=[ + [ + { + "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", + "probability": "NEGLIGIBLE", + }, + {"category": "HARM_CATEGORY_HATE_SPEECH", "probability": "NEGLIGIBLE"}, + {"category": "HARM_CATEGORY_HARASSMENT", "probability": "NEGLIGIBLE"}, + { + "category": "HARM_CATEGORY_DANGEROUS_CONTENT", + "probability": "NEGLIGIBLE", + }, + ] + ], + vertex_ai_citation_metadata=[], + ) + + pc, cc = cost_per_character( + **{ + "model": "gemini-1.5-flash", + "custom_llm_provider": "vertex_ai", + "prompt_tokens": 3771, + "completion_tokens": 2, + "prompt_characters": None, + "completion_characters": 3, + } + ) + + model_info = litellm.get_model_info("gemini-1.5-flash") + + assert round(pc, 10) == round(3771 * model_info["input_cost_per_token"], 10) + assert round(cc, 10) == round( + 3 * model_info["output_cost_per_character"], + 10, + ) diff --git a/tests/local_testing/test_custom_callback_input.py b/tests/local_testing/test_custom_callback_input.py index 6c2b3a646..384b4b6fd 100644 --- a/tests/local_testing/test_custom_callback_input.py +++ b/tests/local_testing/test_custom_callback_input.py @@ -1414,6 +1414,7 @@ def test_logging_standard_payload_llm_headers(stream): with patch.object( customHandler, "log_success_event", new=MagicMock() ) as mock_client: + resp = litellm.completion( model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hey, how's it going?"}], diff --git a/tests/local_testing/test_get_model_info.py b/tests/local_testing/test_get_model_info.py index 19c72ab32..20f9aa16e 100644 --- a/tests/local_testing/test_get_model_info.py +++ b/tests/local_testing/test_get_model_info.py @@ -68,3 +68,9 @@ def test_get_model_info_finetuned_models(): info = litellm.get_model_info("ft:gpt-3.5-turbo:my-org:custom_suffix:id") print("info", info) assert info["input_cost_per_token"] == 0.000003 + + +def test_get_model_info_gemini_pro(): + info = litellm.get_model_info("gemini-1.5-pro-002") + print("info", info) + assert info["key"] == "gemini-1.5-pro-002"