LiteLLM Minor Fixes & Improvements (10/09/2024) (#6139)

* fix(utils.py): don't return 'none' response headers

Fixes https://github.com/BerriAI/litellm/issues/6123

* fix(vertex_and_google_ai_studio_gemini.py): support parsing out additional properties and strict value for tool calls

Fixes https://github.com/BerriAI/litellm/issues/6136

* fix(cost_calculator.py): set default character value to none

Fixes https://github.com/BerriAI/litellm/issues/6133#issuecomment-2403290196

* fix(google.py): fix cost per token / cost per char conversion

Fixes https://github.com/BerriAI/litellm/issues/6133#issuecomment-2403370287

* build(model_prices_and_context_window.json): update gemini pricing

Fixes https://github.com/BerriAI/litellm/issues/6133

* build(model_prices_and_context_window.json): update gemini pricing

* fix(litellm_logging.py): fix streaming caching logging when 'turn_off_message_logging' enabled

Stores unredacted response in cache

* build(model_prices_and_context_window.json): update gemini-1.5-flash pricing

* fix(cost_calculator.py): fix default prompt_character count logic

Fixes error in gemini cost calculation

* fix(cost_calculator.py): fix cost calc for tts models
This commit is contained in:
Krish Dholakia 2024-10-10 00:42:11 -07:00 committed by GitHub
parent 60baa65e0e
commit 6005450c8f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 788 additions and 534 deletions

View file

@ -87,8 +87,8 @@ def cost_per_token(
custom_llm_provider: Optional[str] = None, custom_llm_provider: Optional[str] = None,
region_name=None, region_name=None,
### CHARACTER PRICING ### ### CHARACTER PRICING ###
prompt_characters: int = 0, prompt_characters: Optional[int] = None,
completion_characters: int = 0, completion_characters: Optional[int] = None,
### PROMPT CACHING PRICING ### - used for anthropic ### PROMPT CACHING PRICING ### - used for anthropic
cache_creation_input_tokens: Optional[int] = 0, cache_creation_input_tokens: Optional[int] = 0,
cache_read_input_tokens: Optional[int] = 0, cache_read_input_tokens: Optional[int] = 0,
@ -201,13 +201,24 @@ def cost_per_token(
model = model_without_prefix model = model_without_prefix
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
print_verbose(f"Looking up model={model} in model_cost_map") print_verbose(
f"Looking up model={model} in model_cost_map, custom_llm_provider={custom_llm_provider}, call_type={call_type}"
)
if call_type == "speech" or call_type == "aspeech": if call_type == "speech" or call_type == "aspeech":
if prompt_characters is None:
raise ValueError(
"prompt_characters must be provided for tts calls. prompt_characters={}, model={}, custom_llm_provider={}, call_type={}".format(
prompt_characters,
model,
custom_llm_provider,
call_type,
)
)
prompt_cost, completion_cost = _generic_cost_per_character( prompt_cost, completion_cost = _generic_cost_per_character(
model=model_without_prefix, model=model_without_prefix,
custom_llm_provider=custom_llm_provider, custom_llm_provider=custom_llm_provider,
prompt_characters=prompt_characters, prompt_characters=prompt_characters,
completion_characters=completion_characters, completion_characters=0,
custom_prompt_cost=None, custom_prompt_cost=None,
custom_completion_cost=0, custom_completion_cost=0,
) )
@ -232,10 +243,6 @@ def cost_per_token(
cost_router = google_cost_router( cost_router = google_cost_router(
model=model_without_prefix, model=model_without_prefix,
custom_llm_provider=custom_llm_provider, custom_llm_provider=custom_llm_provider,
prompt_characters=prompt_characters,
completion_characters=completion_characters,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
call_type=call_type, call_type=call_type,
) )
if cost_router == "cost_per_character": if cost_router == "cost_per_character":
@ -542,9 +549,9 @@ def completion_cost(
model = "dall-e-2" # for dall-e-2, azure expects an empty model name model = "dall-e-2" # for dall-e-2, azure expects an empty model name
# Handle Inputs to completion_cost # Handle Inputs to completion_cost
prompt_tokens = 0 prompt_tokens = 0
prompt_characters = 0 prompt_characters: Optional[int] = None
completion_tokens = 0 completion_tokens = 0
completion_characters = 0 completion_characters: Optional[int] = None
cache_creation_input_tokens: Optional[int] = None cache_creation_input_tokens: Optional[int] = None
cache_read_input_tokens: Optional[int] = None cache_read_input_tokens: Optional[int] = None
if completion_response is not None and ( if completion_response is not None and (
@ -721,10 +728,8 @@ def completion_cost(
prompt_string = litellm.utils.get_formatted_prompt( prompt_string = litellm.utils.get_formatted_prompt(
data={"messages": messages}, call_type="completion" data={"messages": messages}, call_type="completion"
) )
else:
prompt_string = ""
prompt_characters = litellm.utils._count_characters(text=prompt_string) prompt_characters = litellm.utils._count_characters(text=prompt_string)
if completion_response is not None and isinstance( if completion_response is not None and isinstance(
completion_response, ModelResponse completion_response, ModelResponse
): ):

View file

@ -901,7 +901,9 @@ class Logging:
complete_streaming_response = None complete_streaming_response = None
else: else:
self.sync_streaming_chunks.append(result) self.sync_streaming_chunks.append(result)
_caching_complete_streaming_response: Optional[
Union[ModelResponse, TextCompletionResponse]
] = None
if complete_streaming_response is not None: if complete_streaming_response is not None:
verbose_logger.debug( verbose_logger.debug(
"Logging Details LiteLLM-Success Call streaming complete" "Logging Details LiteLLM-Success Call streaming complete"
@ -909,6 +911,9 @@ class Logging:
self.model_call_details["complete_streaming_response"] = ( self.model_call_details["complete_streaming_response"] = (
complete_streaming_response complete_streaming_response
) )
_caching_complete_streaming_response = copy.deepcopy(
complete_streaming_response
)
self.model_call_details["response_cost"] = ( self.model_call_details["response_cost"] = (
self._response_cost_calculator(result=complete_streaming_response) self._response_cost_calculator(result=complete_streaming_response)
) )
@ -937,6 +942,20 @@ class Logging:
else: else:
callbacks = litellm.success_callback callbacks = litellm.success_callback
## STREAMING CACHING ##
if "cache" in callbacks and litellm.cache is not None:
# this only logs streaming once, complete_streaming_response exists i.e when stream ends
print_verbose("success_callback: reaches cache for logging!")
kwargs = self.model_call_details
if self.stream and _caching_complete_streaming_response is not None:
print_verbose(
"success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
)
result = _caching_complete_streaming_response
# only add to cache once we have a complete streaming response
litellm.cache.add_cache(result, **kwargs)
## REDACT MESSAGES ##
result = redact_message_input_output_from_logging( result = redact_message_input_output_from_logging(
model_call_details=( model_call_details=(
self.model_call_details self.model_call_details
@ -1302,23 +1321,6 @@ class Logging:
end_time=end_time, end_time=end_time,
print_verbose=print_verbose, print_verbose=print_verbose,
) )
if callback == "cache" and litellm.cache is not None:
# this only logs streaming once, complete_streaming_response exists i.e when stream ends
print_verbose("success_callback: reaches cache for logging!")
kwargs = self.model_call_details
if self.stream:
if "complete_streaming_response" not in kwargs:
print_verbose(
f"success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
)
pass
else:
print_verbose(
"success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
)
result = kwargs["complete_streaming_response"]
# only add to cache once we have a complete streaming response
litellm.cache.add_cache(result, **kwargs)
if callback == "athina" and athinaLogger is not None: if callback == "athina" and athinaLogger is not None:
deep_copy = {} deep_copy = {}
for k, v in self.model_call_details.items(): for k, v in self.model_call_details.items():

View file

@ -32,10 +32,6 @@ def _is_above_128k(tokens: float) -> bool:
def cost_router( def cost_router(
model: str, model: str,
custom_llm_provider: str, custom_llm_provider: str,
prompt_tokens: float,
completion_tokens: float,
prompt_characters: float,
completion_characters: float,
call_type: Union[Literal["embedding", "aembedding"], str], call_type: Union[Literal["embedding", "aembedding"], str],
) -> Literal["cost_per_character", "cost_per_token"]: ) -> Literal["cost_per_character", "cost_per_token"]:
""" """
@ -66,8 +62,8 @@ def cost_per_character(
custom_llm_provider: str, custom_llm_provider: str,
prompt_tokens: float, prompt_tokens: float,
completion_tokens: float, completion_tokens: float,
prompt_characters: float, prompt_characters: Optional[float] = None,
completion_characters: float, completion_characters: Optional[float] = None,
) -> Tuple[float, float]: ) -> Tuple[float, float]:
""" """
Calculates the cost per character for a given VertexAI model, input messages, and response object. Calculates the cost per character for a given VertexAI model, input messages, and response object.
@ -94,87 +90,100 @@ def cost_per_character(
) )
## CALCULATE INPUT COST ## CALCULATE INPUT COST
try: if prompt_characters is None:
if ( prompt_cost, _ = cost_per_token(
_is_above_128k(tokens=prompt_characters * 4) # 1 token = 4 char
and model not in models_without_dynamic_pricing
):
## check if character pricing, else default to token pricing
assert (
"input_cost_per_character_above_128k_tokens" in model_info
and model_info["input_cost_per_character_above_128k_tokens"] is not None
), "model info for model={} does not have 'input_cost_per_character_above_128k_tokens'-pricing for > 128k tokens\nmodel_info={}".format(
model, model_info
)
prompt_cost = (
prompt_characters
* model_info["input_cost_per_character_above_128k_tokens"]
)
else:
assert (
"input_cost_per_character" in model_info
and model_info["input_cost_per_character"] is not None
), "model info for model={} does not have 'input_cost_per_character'-pricing\nmodel_info={}".format(
model, model_info
)
prompt_cost = prompt_characters * model_info["input_cost_per_character"]
except Exception as e:
verbose_logger.exception(
"litellm.litellm_core_utils.llm_cost_calc.google.cost_per_character(): Defaulting to (cost_per_token * 4) calculation for prompt_cost. Exception occured - {}".format(
str(e)
)
)
initial_prompt_cost, _ = cost_per_token(
model=model, model=model,
custom_llm_provider=custom_llm_provider, custom_llm_provider=custom_llm_provider,
prompt_tokens=prompt_tokens, prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens, completion_tokens=completion_tokens,
) )
else:
prompt_cost = initial_prompt_cost * 4 try:
if (
_is_above_128k(tokens=prompt_characters * 4) # 1 token = 4 char
and model not in models_without_dynamic_pricing
):
## check if character pricing, else default to token pricing
assert (
"input_cost_per_character_above_128k_tokens" in model_info
and model_info["input_cost_per_character_above_128k_tokens"]
is not None
), "model info for model={} does not have 'input_cost_per_character_above_128k_tokens'-pricing for > 128k tokens\nmodel_info={}".format(
model, model_info
)
prompt_cost = (
prompt_characters
* model_info["input_cost_per_character_above_128k_tokens"]
)
else:
assert (
"input_cost_per_character" in model_info
and model_info["input_cost_per_character"] is not None
), "model info for model={} does not have 'input_cost_per_character'-pricing\nmodel_info={}".format(
model, model_info
)
prompt_cost = prompt_characters * model_info["input_cost_per_character"]
except Exception as e:
verbose_logger.debug(
"litellm.litellm_core_utils.llm_cost_calc.google.py::cost_per_character(): Exception occured - {}\nDefaulting to None".format(
str(e)
)
)
prompt_cost, _ = cost_per_token(
model=model,
custom_llm_provider=custom_llm_provider,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
)
## CALCULATE OUTPUT COST ## CALCULATE OUTPUT COST
try: if completion_characters is None:
if ( _, completion_cost = cost_per_token(
_is_above_128k(tokens=completion_characters * 4) # 1 token = 4 char
and model not in models_without_dynamic_pricing
):
assert (
"output_cost_per_character_above_128k_tokens" in model_info
and model_info["output_cost_per_character_above_128k_tokens"]
is not None
), "model info for model={} does not have 'output_cost_per_character_above_128k_tokens' pricing\nmodel_info={}".format(
model, model_info
)
completion_cost = (
completion_tokens
* model_info["output_cost_per_character_above_128k_tokens"]
)
else:
assert (
"output_cost_per_character" in model_info
and model_info["output_cost_per_character"] is not None
), "model info for model={} does not have 'output_cost_per_character'-pricing\nmodel_info={}".format(
model, model_info
)
completion_cost = (
completion_tokens * model_info["output_cost_per_character"]
)
except Exception as e:
verbose_logger.exception(
"litellm.litellm_core_utils.llm_cost_calc.google.cost_per_character(): \
Defaulting to (cost_per_token * 4) calculation for completion_cost\nException occured - {}".format(
str(e)
)
)
_, initial_completion_cost = cost_per_token(
model=model, model=model,
custom_llm_provider=custom_llm_provider, custom_llm_provider=custom_llm_provider,
prompt_tokens=prompt_tokens, prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens, completion_tokens=completion_tokens,
) )
else:
try:
if (
_is_above_128k(tokens=completion_characters * 4) # 1 token = 4 char
and model not in models_without_dynamic_pricing
):
assert (
"output_cost_per_character_above_128k_tokens" in model_info
and model_info["output_cost_per_character_above_128k_tokens"]
is not None
), "model info for model={} does not have 'output_cost_per_character_above_128k_tokens' pricing\nmodel_info={}".format(
model, model_info
)
completion_cost = (
completion_tokens
* model_info["output_cost_per_character_above_128k_tokens"]
)
else:
assert (
"output_cost_per_character" in model_info
and model_info["output_cost_per_character"] is not None
), "model info for model={} does not have 'output_cost_per_character'-pricing\nmodel_info={}".format(
model, model_info
)
completion_cost = (
completion_characters * model_info["output_cost_per_character"]
)
except Exception as e:
verbose_logger.debug(
"litellm.litellm_core_utils.llm_cost_calc.google.py::cost_per_character(): Exception occured - {}\nDefaulting to None".format(
str(e)
)
)
_, completion_cost = cost_per_token(
model=model,
custom_llm_provider=custom_llm_provider,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
)
completion_cost = initial_completion_cost * 4
return prompt_cost, completion_cost return prompt_cost, completion_cost

View file

@ -17,9 +17,8 @@ def _generic_cost_per_character(
custom_completion_cost: Optional[float], custom_completion_cost: Optional[float],
) -> Tuple[Optional[float], Optional[float]]: ) -> Tuple[Optional[float], Optional[float]]:
""" """
Generic function to help calculate cost per character. Calculates cost per character for aspeech/speech calls.
"""
"""
Calculates the cost per character for a given model, input messages, and response object. Calculates the cost per character for a given model, input messages, and response object.
Input: Input:
@ -29,7 +28,7 @@ def _generic_cost_per_character(
- completion_characters: float, the number of output characters - completion_characters: float, the number of output characters
Returns: Returns:
Tuple[Optional[float], Optional[float]] - prompt_cost_in_usd, completion_cost_in_usd. Tuple[Optional[float], Optional[float]] - prompt_cost_in_usd, completion_cost_in_usd.
- returns None if not able to calculate cost. - returns None if not able to calculate cost.
Raises: Raises:

View file

@ -7,6 +7,7 @@ import os
import time import time
import types import types
import uuid import uuid
from copy import deepcopy
from enum import Enum from enum import Enum
from functools import partial from functools import partial
from typing import ( from typing import (
@ -65,9 +66,11 @@ from litellm.types.llms.vertex_ai import (
from litellm.types.utils import GenericStreamingChunk from litellm.types.utils import GenericStreamingChunk
from litellm.utils import CustomStreamWrapper, ModelResponse, Usage from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
from ....utils import _remove_additional_properties, _remove_strict_from_schema
from ...base import BaseLLM from ...base import BaseLLM
from ..common_utils import ( from ..common_utils import (
VertexAIError, VertexAIError,
_build_vertex_schema,
_get_gemini_url, _get_gemini_url,
_get_vertex_url, _get_vertex_url,
all_gemini_url_modes, all_gemini_url_modes,
@ -376,7 +379,10 @@ class VertexGeminiConfig:
def _map_function(self, value: List[dict]) -> List[Tools]: def _map_function(self, value: List[dict]) -> List[Tools]:
gtool_func_declarations = [] gtool_func_declarations = []
googleSearchRetrieval: Optional[dict] = None googleSearchRetrieval: Optional[dict] = None
# remove 'additionalProperties' from tools
value = _remove_additional_properties(value)
# remove 'strict' from tools
value = _remove_strict_from_schema(value)
for tool in value: for tool in value:
openai_function_object: Optional[ChatCompletionToolParamFunctionChunk] = ( openai_function_object: Optional[ChatCompletionToolParamFunctionChunk] = (
None None
@ -437,6 +443,10 @@ class VertexGeminiConfig:
if param == "max_tokens" or param == "max_completion_tokens": if param == "max_tokens" or param == "max_completion_tokens":
optional_params["max_output_tokens"] = value optional_params["max_output_tokens"] = value
if param == "response_format" and isinstance(value, dict): # type: ignore if param == "response_format" and isinstance(value, dict): # type: ignore
# remove 'additionalProperties' from json schema
value = _remove_additional_properties(value)
# remove 'strict' from json schema
value = _remove_strict_from_schema(value)
if value["type"] == "json_object": if value["type"] == "json_object":
optional_params["response_mime_type"] = "application/json" optional_params["response_mime_type"] = "application/json"
elif value["type"] == "text": elif value["type"] == "text":
@ -448,6 +458,19 @@ class VertexGeminiConfig:
if "json_schema" in value and "schema" in value["json_schema"]: # type: ignore if "json_schema" in value and "schema" in value["json_schema"]: # type: ignore
optional_params["response_mime_type"] = "application/json" optional_params["response_mime_type"] = "application/json"
optional_params["response_schema"] = value["json_schema"]["schema"] # type: ignore optional_params["response_schema"] = value["json_schema"]["schema"] # type: ignore
if "response_schema" in optional_params and isinstance(
optional_params["response_schema"], dict
):
old_schema = deepcopy(optional_params["response_schema"])
if isinstance(old_schema, list):
for item in old_schema:
if isinstance(item, dict):
item = _build_vertex_schema(parameters=item)
elif isinstance(old_schema, dict):
old_schema = _build_vertex_schema(parameters=old_schema)
optional_params["response_schema"] = old_schema
if param == "frequency_penalty": if param == "frequency_penalty":
optional_params["frequency_penalty"] = value optional_params["frequency_penalty"] = value
if param == "presence_penalty": if param == "presence_penalty":

View file

@ -2106,20 +2106,20 @@
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 2097152, "max_input_tokens": 2097152,
"max_output_tokens": 8192, "max_output_tokens": 8192,
"input_cost_per_image": 0.001315, "input_cost_per_image": 0.00032875,
"input_cost_per_audio_per_second": 0.000125, "input_cost_per_audio_per_second": 0.00003125,
"input_cost_per_video_per_second": 0.001315, "input_cost_per_video_per_second": 0.00032875,
"input_cost_per_token": 0.000005, "input_cost_per_token": 0.000000078125,
"input_cost_per_character": 0.00000125, "input_cost_per_character": 0.0000003125,
"input_cost_per_token_above_128k_tokens": 0.00001, "input_cost_per_image_above_128k_tokens": 0.0006575,
"input_cost_per_character_above_128k_tokens": 0.0000025, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
"output_cost_per_token": 0.000015, "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
"output_cost_per_character": 0.00000375, "input_cost_per_token_above_128k_tokens": 0.00000015625,
"output_cost_per_token_above_128k_tokens": 0.00003, "input_cost_per_character_above_128k_tokens": 0.000000625,
"output_cost_per_character_above_128k_tokens": 0.0000075, "output_cost_per_token": 0.0000003125,
"output_cost_per_image": 0.00263, "output_cost_per_character": 0.00000125,
"output_cost_per_video_per_second": 0.00263, "output_cost_per_token_above_128k_tokens": 0.000000625,
"output_cost_per_audio_per_second": 0.00025, "output_cost_per_character_above_128k_tokens": 0.0000025,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
@ -2132,20 +2132,20 @@
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 2097152, "max_input_tokens": 2097152,
"max_output_tokens": 8192, "max_output_tokens": 8192,
"input_cost_per_image": 0.001315, "input_cost_per_image": 0.00032875,
"input_cost_per_audio_per_second": 0.000125, "input_cost_per_audio_per_second": 0.00003125,
"input_cost_per_video_per_second": 0.001315, "input_cost_per_video_per_second": 0.00032875,
"input_cost_per_token": 0.000005, "input_cost_per_token": 0.000000078125,
"input_cost_per_character": 0.00000125, "input_cost_per_character": 0.0000003125,
"input_cost_per_token_above_128k_tokens": 0.00001, "input_cost_per_image_above_128k_tokens": 0.0006575,
"input_cost_per_character_above_128k_tokens": 0.0000025, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
"output_cost_per_token": 0.000015, "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
"output_cost_per_character": 0.00000375, "input_cost_per_token_above_128k_tokens": 0.00000015625,
"output_cost_per_token_above_128k_tokens": 0.00003, "input_cost_per_character_above_128k_tokens": 0.000000625,
"output_cost_per_character_above_128k_tokens": 0.0000075, "output_cost_per_token": 0.0000003125,
"output_cost_per_image": 0.00263, "output_cost_per_character": 0.00000125,
"output_cost_per_video_per_second": 0.00263, "output_cost_per_token_above_128k_tokens": 0.000000625,
"output_cost_per_audio_per_second": 0.00025, "output_cost_per_character_above_128k_tokens": 0.0000025,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
@ -2158,20 +2158,20 @@
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 1000000, "max_input_tokens": 1000000,
"max_output_tokens": 8192, "max_output_tokens": 8192,
"input_cost_per_image": 0.001315, "input_cost_per_image": 0.00032875,
"input_cost_per_audio_per_second": 0.000125, "input_cost_per_audio_per_second": 0.00003125,
"input_cost_per_video_per_second": 0.001315, "input_cost_per_video_per_second": 0.00032875,
"input_cost_per_token": 0.000005, "input_cost_per_token": 0.000000078125,
"input_cost_per_character": 0.00000125, "input_cost_per_character": 0.0000003125,
"input_cost_per_token_above_128k_tokens": 0.00001, "input_cost_per_image_above_128k_tokens": 0.0006575,
"input_cost_per_character_above_128k_tokens": 0.0000025, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
"output_cost_per_token": 0.000015, "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
"output_cost_per_character": 0.00000375, "input_cost_per_token_above_128k_tokens": 0.00000015625,
"output_cost_per_token_above_128k_tokens": 0.00003, "input_cost_per_character_above_128k_tokens": 0.000000625,
"output_cost_per_character_above_128k_tokens": 0.0000075, "output_cost_per_token": 0.0000003125,
"output_cost_per_image": 0.00263, "output_cost_per_character": 0.00000125,
"output_cost_per_video_per_second": 0.00263, "output_cost_per_token_above_128k_tokens": 0.000000625,
"output_cost_per_audio_per_second": 0.00025, "output_cost_per_character_above_128k_tokens": 0.0000025,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
@ -2184,20 +2184,20 @@
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 1000000, "max_input_tokens": 1000000,
"max_output_tokens": 8192, "max_output_tokens": 8192,
"input_cost_per_image": 0.001315, "input_cost_per_image": 0.00032875,
"input_cost_per_audio_per_second": 0.000125, "input_cost_per_audio_per_second": 0.00003125,
"input_cost_per_video_per_second": 0.001315, "input_cost_per_video_per_second": 0.00032875,
"input_cost_per_token": 0.000005, "input_cost_per_token": 0.000000078125,
"input_cost_per_character": 0.00000125, "input_cost_per_character": 0.0000003125,
"input_cost_per_token_above_128k_tokens": 0.00001, "input_cost_per_image_above_128k_tokens": 0.0006575,
"input_cost_per_character_above_128k_tokens": 0.0000025, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
"output_cost_per_token": 0.000015, "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
"output_cost_per_character": 0.00000375, "input_cost_per_token_above_128k_tokens": 0.00000015625,
"output_cost_per_token_above_128k_tokens": 0.00003, "input_cost_per_character_above_128k_tokens": 0.000000625,
"output_cost_per_character_above_128k_tokens": 0.0000075, "output_cost_per_token": 0.0000003125,
"output_cost_per_image": 0.00263, "output_cost_per_character": 0.00000125,
"output_cost_per_video_per_second": 0.00263, "output_cost_per_token_above_128k_tokens": 0.000000625,
"output_cost_per_audio_per_second": 0.00025, "output_cost_per_character_above_128k_tokens": 0.0000025,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
@ -2210,20 +2210,20 @@
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 1000000, "max_input_tokens": 1000000,
"max_output_tokens": 8192, "max_output_tokens": 8192,
"input_cost_per_image": 0.001315, "input_cost_per_image": 0.00032875,
"input_cost_per_audio_per_second": 0.000125, "input_cost_per_audio_per_second": 0.00003125,
"input_cost_per_video_per_second": 0.001315, "input_cost_per_video_per_second": 0.00032875,
"input_cost_per_token": 0.000005, "input_cost_per_token": 0.000000078125,
"input_cost_per_character": 0.00000125, "input_cost_per_character": 0.0000003125,
"input_cost_per_token_above_128k_tokens": 0.00001, "input_cost_per_image_above_128k_tokens": 0.0006575,
"input_cost_per_character_above_128k_tokens": 0.0000025, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
"output_cost_per_token": 0.000015, "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
"output_cost_per_character": 0.00000375, "input_cost_per_token_above_128k_tokens": 0.00000015625,
"output_cost_per_token_above_128k_tokens": 0.00003, "input_cost_per_character_above_128k_tokens": 0.000000625,
"output_cost_per_character_above_128k_tokens": 0.0000075, "output_cost_per_token": 0.0000003125,
"output_cost_per_image": 0.00263, "output_cost_per_character": 0.00000125,
"output_cost_per_video_per_second": 0.00263, "output_cost_per_token_above_128k_tokens": 0.000000625,
"output_cost_per_audio_per_second": 0.00025, "output_cost_per_character_above_128k_tokens": 0.0000025,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
@ -2236,20 +2236,20 @@
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 1000000, "max_input_tokens": 1000000,
"max_output_tokens": 8192, "max_output_tokens": 8192,
"input_cost_per_image": 0.001315, "input_cost_per_image": 0.00032875,
"input_cost_per_audio_per_second": 0.000125, "input_cost_per_audio_per_second": 0.00003125,
"input_cost_per_video_per_second": 0.001315, "input_cost_per_video_per_second": 0.00032875,
"input_cost_per_token": 0.000005, "input_cost_per_token": 0.000000078125,
"input_cost_per_character": 0.00000125, "input_cost_per_character": 0.0000003125,
"input_cost_per_token_above_128k_tokens": 0.00001, "input_cost_per_image_above_128k_tokens": 0.0006575,
"input_cost_per_character_above_128k_tokens": 0.0000025, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
"output_cost_per_token": 0.000015, "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
"output_cost_per_character": 0.00000375, "input_cost_per_token_above_128k_tokens": 0.00000015625,
"output_cost_per_token_above_128k_tokens": 0.00003, "input_cost_per_character_above_128k_tokens": 0.000000625,
"output_cost_per_character_above_128k_tokens": 0.0000075, "output_cost_per_token": 0.0000003125,
"output_cost_per_image": 0.00263, "output_cost_per_character": 0.00000125,
"output_cost_per_video_per_second": 0.00263, "output_cost_per_token_above_128k_tokens": 0.000000625,
"output_cost_per_audio_per_second": 0.00025, "output_cost_per_character_above_128k_tokens": 0.0000025,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat", "mode": "chat",
"supports_function_calling": true, "supports_function_calling": true,
@ -2267,20 +2267,20 @@
"max_audio_length_hours": 8.4, "max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1, "max_audio_per_prompt": 1,
"max_pdf_size_mb": 30, "max_pdf_size_mb": 30,
"input_cost_per_image": 0.0001315, "input_cost_per_image": 0.00002,
"input_cost_per_video_per_second": 0.0001315, "input_cost_per_video_per_second": 0.00002,
"input_cost_per_audio_per_second": 0.000125, "input_cost_per_audio_per_second": 0.000002,
"input_cost_per_token": 0.0000005, "input_cost_per_token": 0.000000004688,
"input_cost_per_character": 0.000000125, "input_cost_per_character": 0.00000001875,
"input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_token_above_128k_tokens": 0.000001,
"input_cost_per_character_above_128k_tokens": 0.00000025, "input_cost_per_character_above_128k_tokens": 0.00000025,
"output_cost_per_token": 0.0000015, "input_cost_per_image_above_128k_tokens": 0.00004,
"output_cost_per_character": 0.000000375, "input_cost_per_video_per_second_above_128k_tokens": 0.00004,
"output_cost_per_token_above_128k_tokens": 0.000003, "input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
"output_cost_per_character_above_128k_tokens": 0.00000075, "output_cost_per_token": 0.0000000046875,
"output_cost_per_image": 0.000263, "output_cost_per_character": 0.00000001875,
"output_cost_per_video_per_second": 0.000263, "output_cost_per_token_above_128k_tokens": 0.000000009375,
"output_cost_per_audio_per_second": 0.00025, "output_cost_per_character_above_128k_tokens": 0.0000000375,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
@ -2299,20 +2299,20 @@
"max_audio_length_hours": 8.4, "max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1, "max_audio_per_prompt": 1,
"max_pdf_size_mb": 30, "max_pdf_size_mb": 30,
"input_cost_per_image": 0.0001315, "input_cost_per_image": 0.00002,
"input_cost_per_video_per_second": 0.0001315, "input_cost_per_video_per_second": 0.00002,
"input_cost_per_audio_per_second": 0.000125, "input_cost_per_audio_per_second": 0.000002,
"input_cost_per_token": 0.0000005, "input_cost_per_token": 0.000000004688,
"input_cost_per_character": 0.000000125, "input_cost_per_character": 0.00000001875,
"input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_token_above_128k_tokens": 0.000001,
"input_cost_per_character_above_128k_tokens": 0.00000025, "input_cost_per_character_above_128k_tokens": 0.00000025,
"output_cost_per_token": 0.0000015, "input_cost_per_image_above_128k_tokens": 0.00004,
"output_cost_per_character": 0.000000375, "input_cost_per_video_per_second_above_128k_tokens": 0.00004,
"output_cost_per_token_above_128k_tokens": 0.000003, "input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
"output_cost_per_character_above_128k_tokens": 0.00000075, "output_cost_per_token": 0.0000000046875,
"output_cost_per_image": 0.000263, "output_cost_per_character": 0.00000001875,
"output_cost_per_video_per_second": 0.000263, "output_cost_per_token_above_128k_tokens": 0.000000009375,
"output_cost_per_audio_per_second": 0.00025, "output_cost_per_character_above_128k_tokens": 0.0000000375,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
@ -2331,20 +2331,20 @@
"max_audio_length_hours": 8.4, "max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1, "max_audio_per_prompt": 1,
"max_pdf_size_mb": 30, "max_pdf_size_mb": 30,
"input_cost_per_image": 0.0001315, "input_cost_per_image": 0.00002,
"input_cost_per_video_per_second": 0.0001315, "input_cost_per_video_per_second": 0.00002,
"input_cost_per_audio_per_second": 0.000125, "input_cost_per_audio_per_second": 0.000002,
"input_cost_per_token": 0.0000005, "input_cost_per_token": 0.000000004688,
"input_cost_per_character": 0.000000125, "input_cost_per_character": 0.00000001875,
"input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_token_above_128k_tokens": 0.000001,
"input_cost_per_character_above_128k_tokens": 0.00000025, "input_cost_per_character_above_128k_tokens": 0.00000025,
"output_cost_per_token": 0.0000015, "input_cost_per_image_above_128k_tokens": 0.00004,
"output_cost_per_character": 0.000000375, "input_cost_per_video_per_second_above_128k_tokens": 0.00004,
"output_cost_per_token_above_128k_tokens": 0.000003, "input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
"output_cost_per_character_above_128k_tokens": 0.00000075, "output_cost_per_token": 0.0000000046875,
"output_cost_per_image": 0.000263, "output_cost_per_character": 0.00000001875,
"output_cost_per_video_per_second": 0.000263, "output_cost_per_token_above_128k_tokens": 0.000000009375,
"output_cost_per_audio_per_second": 0.00025, "output_cost_per_character_above_128k_tokens": 0.0000000375,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
@ -2363,20 +2363,20 @@
"max_audio_length_hours": 8.4, "max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1, "max_audio_per_prompt": 1,
"max_pdf_size_mb": 30, "max_pdf_size_mb": 30,
"input_cost_per_image": 0.0001315, "input_cost_per_image": 0.00002,
"input_cost_per_video_per_second": 0.0001315, "input_cost_per_video_per_second": 0.00002,
"input_cost_per_audio_per_second": 0.000125, "input_cost_per_audio_per_second": 0.000002,
"input_cost_per_token": 0.0000005, "input_cost_per_token": 0.000000004688,
"input_cost_per_character": 0.000000125, "input_cost_per_character": 0.00000001875,
"input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_token_above_128k_tokens": 0.000001,
"input_cost_per_character_above_128k_tokens": 0.00000025, "input_cost_per_character_above_128k_tokens": 0.00000025,
"output_cost_per_token": 0.0000015, "input_cost_per_image_above_128k_tokens": 0.00004,
"output_cost_per_character": 0.000000375, "input_cost_per_video_per_second_above_128k_tokens": 0.00004,
"output_cost_per_token_above_128k_tokens": 0.000003, "input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
"output_cost_per_character_above_128k_tokens": 0.00000075, "output_cost_per_token": 0.0000000046875,
"output_cost_per_image": 0.000263, "output_cost_per_character": 0.00000001875,
"output_cost_per_video_per_second": 0.000263, "output_cost_per_token_above_128k_tokens": 0.000000009375,
"output_cost_per_audio_per_second": 0.00025, "output_cost_per_character_above_128k_tokens": 0.0000000375,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
@ -2395,20 +2395,20 @@
"max_audio_length_hours": 8.4, "max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1, "max_audio_per_prompt": 1,
"max_pdf_size_mb": 30, "max_pdf_size_mb": 30,
"input_cost_per_image": 0.0001315, "input_cost_per_image": 0.00002,
"input_cost_per_video_per_second": 0.0001315, "input_cost_per_video_per_second": 0.00002,
"input_cost_per_audio_per_second": 0.000125, "input_cost_per_audio_per_second": 0.000002,
"input_cost_per_token": 0.0000005, "input_cost_per_token": 0.000000004688,
"input_cost_per_character": 0.000000125, "input_cost_per_character": 0.00000001875,
"input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_token_above_128k_tokens": 0.000001,
"input_cost_per_character_above_128k_tokens": 0.00000025, "input_cost_per_character_above_128k_tokens": 0.00000025,
"output_cost_per_token": 0.0000015, "input_cost_per_image_above_128k_tokens": 0.00004,
"output_cost_per_character": 0.000000375, "input_cost_per_video_per_second_above_128k_tokens": 0.00004,
"output_cost_per_token_above_128k_tokens": 0.000003, "input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
"output_cost_per_character_above_128k_tokens": 0.00000075, "output_cost_per_token": 0.0000000046875,
"output_cost_per_image": 0.000263, "output_cost_per_character": 0.00000001875,
"output_cost_per_video_per_second": 0.000263, "output_cost_per_token_above_128k_tokens": 0.000000009375,
"output_cost_per_audio_per_second": 0.00025, "output_cost_per_character_above_128k_tokens": 0.0000000375,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,

View file

@ -6,5 +6,7 @@ model_list:
api_base: os.environ/AZURE_API_BASE api_base: os.environ/AZURE_API_BASE
litellm_settings: litellm_settings:
success_callback: ["langfuse"] turn_off_message_logging: true
max_internal_user_budget: 10 cache: True
cache_params:
type: local

View file

@ -2771,6 +2771,11 @@ def get_optional_params_embeddings(
def _remove_additional_properties(schema): def _remove_additional_properties(schema):
"""
clean out 'additionalProperties = False'. Causes vertexai/gemini OpenAI API Schema errors - https://github.com/langchain-ai/langchainjs/issues/5240
Relevant Issues: https://github.com/BerriAI/litellm/issues/6136, https://github.com/BerriAI/litellm/issues/6088
"""
if isinstance(schema, dict): if isinstance(schema, dict):
# Remove the 'additionalProperties' key if it exists and is set to False # Remove the 'additionalProperties' key if it exists and is set to False
if "additionalProperties" in schema and schema["additionalProperties"] is False: if "additionalProperties" in schema and schema["additionalProperties"] is False:
@ -2789,6 +2794,9 @@ def _remove_additional_properties(schema):
def _remove_strict_from_schema(schema): def _remove_strict_from_schema(schema):
"""
Relevant Issues: https://github.com/BerriAI/litellm/issues/6136, https://github.com/BerriAI/litellm/issues/6088
"""
if isinstance(schema, dict): if isinstance(schema, dict):
# Remove the 'additionalProperties' key if it exists and is set to False # Remove the 'additionalProperties' key if it exists and is set to False
if "strict" in schema: if "strict" in schema:
@ -3000,37 +3008,6 @@ def get_optional_params(
non_default_params["response_format"] = type_to_response_format_param( non_default_params["response_format"] = type_to_response_format_param(
response_format=non_default_params["response_format"] response_format=non_default_params["response_format"]
) )
# # clean out 'additionalProperties = False'. Causes vertexai/gemini OpenAI API Schema errors - https://github.com/langchain-ai/langchainjs/issues/5240
if (
non_default_params["response_format"] is not None
and non_default_params["response_format"]
.get("json_schema", {})
.get("schema")
is not None
and custom_llm_provider
in [
"gemini",
"vertex_ai",
"vertex_ai_beta",
]
):
from litellm.llms.vertex_ai_and_google_ai_studio.common_utils import (
_build_vertex_schema,
)
old_schema = copy.deepcopy(
non_default_params["response_format"]
.get("json_schema", {})
.get("schema")
)
new_schema = _remove_additional_properties(schema=old_schema)
if isinstance(new_schema, list):
for item in new_schema:
if isinstance(item, dict):
item = _build_vertex_schema(parameters=item)
elif isinstance(new_schema, dict):
new_schema = _build_vertex_schema(parameters=new_schema)
non_default_params["response_format"]["json_schema"]["schema"] = new_schema
if "tools" in non_default_params and isinstance( if "tools" in non_default_params and isinstance(
non_default_params, list non_default_params, list
): # fixes https://github.com/BerriAI/litellm/issues/4933 ): # fixes https://github.com/BerriAI/litellm/issues/4933
@ -3197,7 +3174,7 @@ def get_optional_params(
if stream: if stream:
optional_params["stream"] = stream optional_params["stream"] = stream
#return optional_params # return optional_params
if max_tokens is not None: if max_tokens is not None:
if "vicuna" in model or "flan" in model: if "vicuna" in model or "flan" in model:
optional_params["max_length"] = max_tokens optional_params["max_length"] = max_tokens
@ -4900,6 +4877,10 @@ def _strip_model_name(model: str) -> str:
return strip_finetune return strip_finetune
def _get_model_info_from_model_cost(key: str) -> dict:
return litellm.model_cost[key]
def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> ModelInfo: def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> ModelInfo:
""" """
Get a dict for the maximum tokens (context window), input_cost_per_token, output_cost_per_token for a given model. Get a dict for the maximum tokens (context window), input_cost_per_token, output_cost_per_token for a given model.
@ -5041,14 +5022,16 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
""" """
Check if: (in order of specificity) Check if: (in order of specificity)
1. 'custom_llm_provider/model' in litellm.model_cost. Checks "groq/llama3-8b-8192" if model="llama3-8b-8192" and custom_llm_provider="groq" 1. 'custom_llm_provider/model' in litellm.model_cost. Checks "groq/llama3-8b-8192" if model="llama3-8b-8192" and custom_llm_provider="groq"
2. 'combined_stripped_model_name' in litellm.model_cost. Checks if 'gemini/gemini-1.5-flash' in model map, if 'gemini/gemini-1.5-flash-001' given. 2. 'model' in litellm.model_cost. Checks "gemini-1.5-pro-002" in litellm.model_cost if model="gemini-1.5-pro-002" and custom_llm_provider=None
3. 'stripped_model_name' in litellm.model_cost. Checks if 'ft:gpt-3.5-turbo' in model map, if 'ft:gpt-3.5-turbo:my-org:custom_suffix:id' given. 3. 'combined_stripped_model_name' in litellm.model_cost. Checks if 'gemini/gemini-1.5-flash' in model map, if 'gemini/gemini-1.5-flash-001' given.
4. 'model' in litellm.model_cost. Checks "groq/llama3-8b-8192" in litellm.model_cost if model="groq/llama3-8b-8192" and custom_llm_provider=None 4. 'stripped_model_name' in litellm.model_cost. Checks if 'ft:gpt-3.5-turbo' in model map, if 'ft:gpt-3.5-turbo:my-org:custom_suffix:id' given.
5. 'split_model' in litellm.model_cost. Checks "llama3-8b-8192" in litellm.model_cost if model="groq/llama3-8b-8192" 5. 'split_model' in litellm.model_cost. Checks "llama3-8b-8192" in litellm.model_cost if model="groq/llama3-8b-8192"
""" """
_model_info: Optional[Dict[str, Any]] = None
key: Optional[str] = None
if combined_model_name in litellm.model_cost: if combined_model_name in litellm.model_cost:
key = combined_model_name key = combined_model_name
_model_info = litellm.model_cost[combined_model_name] _model_info = _get_model_info_from_model_cost(key=key)
_model_info["supported_openai_params"] = supported_openai_params _model_info["supported_openai_params"] = supported_openai_params
if ( if (
"litellm_provider" in _model_info "litellm_provider" in _model_info
@ -5059,58 +5042,10 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
].startswith("vertex_ai"): ].startswith("vertex_ai"):
pass pass
else: else:
raise Exception _model_info = None
elif combined_stripped_model_name in litellm.model_cost: if _model_info is None and model in litellm.model_cost:
key = combined_stripped_model_name
_model_info = litellm.model_cost[combined_stripped_model_name]
_model_info["supported_openai_params"] = supported_openai_params
if (
"litellm_provider" in _model_info
and _model_info["litellm_provider"] != custom_llm_provider
):
if custom_llm_provider == "vertex_ai" and _model_info[
"litellm_provider"
].startswith("vertex_ai"):
pass
elif custom_llm_provider == "fireworks_ai" and _model_info[
"litellm_provider"
].startswith("fireworks_ai"):
pass
else:
raise Exception(
"Got provider={}, Expected provider={}, for model={}".format(
_model_info["litellm_provider"],
custom_llm_provider,
model,
)
)
elif stripped_model_name in litellm.model_cost:
key = stripped_model_name
_model_info = litellm.model_cost[stripped_model_name]
_model_info["supported_openai_params"] = supported_openai_params
if (
"litellm_provider" in _model_info
and _model_info["litellm_provider"] != custom_llm_provider
):
if custom_llm_provider == "vertex_ai" and _model_info[
"litellm_provider"
].startswith("vertex_ai"):
pass
elif custom_llm_provider == "fireworks_ai" and _model_info[
"litellm_provider"
].startswith("fireworks_ai"):
pass
else:
raise Exception(
"Got provider={}, Expected provider={}, for model={}".format(
_model_info["litellm_provider"],
custom_llm_provider,
model,
)
)
elif model in litellm.model_cost:
key = model key = model
_model_info = litellm.model_cost[model] _model_info = _get_model_info_from_model_cost(key=key)
_model_info["supported_openai_params"] = supported_openai_params _model_info["supported_openai_params"] = supported_openai_params
if ( if (
"litellm_provider" in _model_info "litellm_provider" in _model_info
@ -5125,10 +5060,50 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
].startswith("fireworks_ai"): ].startswith("fireworks_ai"):
pass pass
else: else:
raise Exception _model_info = None
elif split_model in litellm.model_cost: if (
_model_info is None
and combined_stripped_model_name in litellm.model_cost
):
key = combined_stripped_model_name
_model_info = _get_model_info_from_model_cost(key=key)
_model_info["supported_openai_params"] = supported_openai_params
if (
"litellm_provider" in _model_info
and _model_info["litellm_provider"] != custom_llm_provider
):
if custom_llm_provider == "vertex_ai" and _model_info[
"litellm_provider"
].startswith("vertex_ai"):
pass
elif custom_llm_provider == "fireworks_ai" and _model_info[
"litellm_provider"
].startswith("fireworks_ai"):
pass
else:
_model_info = None
if _model_info is None and stripped_model_name in litellm.model_cost:
key = stripped_model_name
_model_info = _get_model_info_from_model_cost(key=key)
_model_info["supported_openai_params"] = supported_openai_params
if (
"litellm_provider" in _model_info
and _model_info["litellm_provider"] != custom_llm_provider
):
if custom_llm_provider == "vertex_ai" and _model_info[
"litellm_provider"
].startswith("vertex_ai"):
pass
elif custom_llm_provider == "fireworks_ai" and _model_info[
"litellm_provider"
].startswith("fireworks_ai"):
pass
else:
_model_info = None
if _model_info is None and split_model in litellm.model_cost:
key = split_model key = split_model
_model_info = litellm.model_cost[split_model] _model_info = _get_model_info_from_model_cost(key=key)
_model_info["supported_openai_params"] = supported_openai_params _model_info["supported_openai_params"] = supported_openai_params
if ( if (
"litellm_provider" in _model_info "litellm_provider" in _model_info
@ -5143,8 +5118,8 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
].startswith("fireworks_ai"): ].startswith("fireworks_ai"):
pass pass
else: else:
raise Exception _model_info = None
else: if _model_info is None or key is None:
raise ValueError( raise ValueError(
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json" "This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
) )
@ -5212,7 +5187,7 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
litellm_provider=_model_info.get( litellm_provider=_model_info.get(
"litellm_provider", custom_llm_provider "litellm_provider", custom_llm_provider
), ),
mode=_model_info.get("mode"), mode=_model_info.get("mode"), # type: ignore
supported_openai_params=supported_openai_params, supported_openai_params=supported_openai_params,
supports_system_messages=_model_info.get( supports_system_messages=_model_info.get(
"supports_system_messages", None "supports_system_messages", None
@ -9260,10 +9235,6 @@ def process_response_headers(response_headers: Union[httpx.Headers, dict]) -> di
processed_headers[k] = v processed_headers[k] = v
else: else:
additional_headers["{}-{}".format("llm_provider", k)] = v additional_headers["{}-{}".format("llm_provider", k)] = v
## GUARANTEE OPENAI HEADERS IN RESPONSE
for item in OPENAI_RESPONSE_HEADERS:
if item not in openai_headers:
openai_headers[item] = None
additional_headers = { additional_headers = {
**openai_headers, **openai_headers,

View file

@ -2106,20 +2106,20 @@
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 2097152, "max_input_tokens": 2097152,
"max_output_tokens": 8192, "max_output_tokens": 8192,
"input_cost_per_image": 0.001315, "input_cost_per_image": 0.00032875,
"input_cost_per_audio_per_second": 0.000125, "input_cost_per_audio_per_second": 0.00003125,
"input_cost_per_video_per_second": 0.001315, "input_cost_per_video_per_second": 0.00032875,
"input_cost_per_token": 0.000005, "input_cost_per_token": 0.000000078125,
"input_cost_per_character": 0.00000125, "input_cost_per_character": 0.0000003125,
"input_cost_per_token_above_128k_tokens": 0.00001, "input_cost_per_image_above_128k_tokens": 0.0006575,
"input_cost_per_character_above_128k_tokens": 0.0000025, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
"output_cost_per_token": 0.000015, "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
"output_cost_per_character": 0.00000375, "input_cost_per_token_above_128k_tokens": 0.00000015625,
"output_cost_per_token_above_128k_tokens": 0.00003, "input_cost_per_character_above_128k_tokens": 0.000000625,
"output_cost_per_character_above_128k_tokens": 0.0000075, "output_cost_per_token": 0.0000003125,
"output_cost_per_image": 0.00263, "output_cost_per_character": 0.00000125,
"output_cost_per_video_per_second": 0.00263, "output_cost_per_token_above_128k_tokens": 0.000000625,
"output_cost_per_audio_per_second": 0.00025, "output_cost_per_character_above_128k_tokens": 0.0000025,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
@ -2132,20 +2132,20 @@
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 2097152, "max_input_tokens": 2097152,
"max_output_tokens": 8192, "max_output_tokens": 8192,
"input_cost_per_image": 0.001315, "input_cost_per_image": 0.00032875,
"input_cost_per_audio_per_second": 0.000125, "input_cost_per_audio_per_second": 0.00003125,
"input_cost_per_video_per_second": 0.001315, "input_cost_per_video_per_second": 0.00032875,
"input_cost_per_token": 0.000005, "input_cost_per_token": 0.000000078125,
"input_cost_per_character": 0.00000125, "input_cost_per_character": 0.0000003125,
"input_cost_per_token_above_128k_tokens": 0.00001, "input_cost_per_image_above_128k_tokens": 0.0006575,
"input_cost_per_character_above_128k_tokens": 0.0000025, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
"output_cost_per_token": 0.000015, "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
"output_cost_per_character": 0.00000375, "input_cost_per_token_above_128k_tokens": 0.00000015625,
"output_cost_per_token_above_128k_tokens": 0.00003, "input_cost_per_character_above_128k_tokens": 0.000000625,
"output_cost_per_character_above_128k_tokens": 0.0000075, "output_cost_per_token": 0.0000003125,
"output_cost_per_image": 0.00263, "output_cost_per_character": 0.00000125,
"output_cost_per_video_per_second": 0.00263, "output_cost_per_token_above_128k_tokens": 0.000000625,
"output_cost_per_audio_per_second": 0.00025, "output_cost_per_character_above_128k_tokens": 0.0000025,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
@ -2158,20 +2158,20 @@
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 1000000, "max_input_tokens": 1000000,
"max_output_tokens": 8192, "max_output_tokens": 8192,
"input_cost_per_image": 0.001315, "input_cost_per_image": 0.00032875,
"input_cost_per_audio_per_second": 0.000125, "input_cost_per_audio_per_second": 0.00003125,
"input_cost_per_video_per_second": 0.001315, "input_cost_per_video_per_second": 0.00032875,
"input_cost_per_token": 0.000005, "input_cost_per_token": 0.000000078125,
"input_cost_per_character": 0.00000125, "input_cost_per_character": 0.0000003125,
"input_cost_per_token_above_128k_tokens": 0.00001, "input_cost_per_image_above_128k_tokens": 0.0006575,
"input_cost_per_character_above_128k_tokens": 0.0000025, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
"output_cost_per_token": 0.000015, "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
"output_cost_per_character": 0.00000375, "input_cost_per_token_above_128k_tokens": 0.00000015625,
"output_cost_per_token_above_128k_tokens": 0.00003, "input_cost_per_character_above_128k_tokens": 0.000000625,
"output_cost_per_character_above_128k_tokens": 0.0000075, "output_cost_per_token": 0.0000003125,
"output_cost_per_image": 0.00263, "output_cost_per_character": 0.00000125,
"output_cost_per_video_per_second": 0.00263, "output_cost_per_token_above_128k_tokens": 0.000000625,
"output_cost_per_audio_per_second": 0.00025, "output_cost_per_character_above_128k_tokens": 0.0000025,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
@ -2184,20 +2184,20 @@
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 1000000, "max_input_tokens": 1000000,
"max_output_tokens": 8192, "max_output_tokens": 8192,
"input_cost_per_image": 0.001315, "input_cost_per_image": 0.00032875,
"input_cost_per_audio_per_second": 0.000125, "input_cost_per_audio_per_second": 0.00003125,
"input_cost_per_video_per_second": 0.001315, "input_cost_per_video_per_second": 0.00032875,
"input_cost_per_token": 0.000005, "input_cost_per_token": 0.000000078125,
"input_cost_per_character": 0.00000125, "input_cost_per_character": 0.0000003125,
"input_cost_per_token_above_128k_tokens": 0.00001, "input_cost_per_image_above_128k_tokens": 0.0006575,
"input_cost_per_character_above_128k_tokens": 0.0000025, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
"output_cost_per_token": 0.000015, "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
"output_cost_per_character": 0.00000375, "input_cost_per_token_above_128k_tokens": 0.00000015625,
"output_cost_per_token_above_128k_tokens": 0.00003, "input_cost_per_character_above_128k_tokens": 0.000000625,
"output_cost_per_character_above_128k_tokens": 0.0000075, "output_cost_per_token": 0.0000003125,
"output_cost_per_image": 0.00263, "output_cost_per_character": 0.00000125,
"output_cost_per_video_per_second": 0.00263, "output_cost_per_token_above_128k_tokens": 0.000000625,
"output_cost_per_audio_per_second": 0.00025, "output_cost_per_character_above_128k_tokens": 0.0000025,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
@ -2210,20 +2210,20 @@
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 1000000, "max_input_tokens": 1000000,
"max_output_tokens": 8192, "max_output_tokens": 8192,
"input_cost_per_image": 0.001315, "input_cost_per_image": 0.00032875,
"input_cost_per_audio_per_second": 0.000125, "input_cost_per_audio_per_second": 0.00003125,
"input_cost_per_video_per_second": 0.001315, "input_cost_per_video_per_second": 0.00032875,
"input_cost_per_token": 0.000005, "input_cost_per_token": 0.000000078125,
"input_cost_per_character": 0.00000125, "input_cost_per_character": 0.0000003125,
"input_cost_per_token_above_128k_tokens": 0.00001, "input_cost_per_image_above_128k_tokens": 0.0006575,
"input_cost_per_character_above_128k_tokens": 0.0000025, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
"output_cost_per_token": 0.000015, "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
"output_cost_per_character": 0.00000375, "input_cost_per_token_above_128k_tokens": 0.00000015625,
"output_cost_per_token_above_128k_tokens": 0.00003, "input_cost_per_character_above_128k_tokens": 0.000000625,
"output_cost_per_character_above_128k_tokens": 0.0000075, "output_cost_per_token": 0.0000003125,
"output_cost_per_image": 0.00263, "output_cost_per_character": 0.00000125,
"output_cost_per_video_per_second": 0.00263, "output_cost_per_token_above_128k_tokens": 0.000000625,
"output_cost_per_audio_per_second": 0.00025, "output_cost_per_character_above_128k_tokens": 0.0000025,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
@ -2236,20 +2236,20 @@
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 1000000, "max_input_tokens": 1000000,
"max_output_tokens": 8192, "max_output_tokens": 8192,
"input_cost_per_image": 0.001315, "input_cost_per_image": 0.00032875,
"input_cost_per_audio_per_second": 0.000125, "input_cost_per_audio_per_second": 0.00003125,
"input_cost_per_video_per_second": 0.001315, "input_cost_per_video_per_second": 0.00032875,
"input_cost_per_token": 0.000005, "input_cost_per_token": 0.000000078125,
"input_cost_per_character": 0.00000125, "input_cost_per_character": 0.0000003125,
"input_cost_per_token_above_128k_tokens": 0.00001, "input_cost_per_image_above_128k_tokens": 0.0006575,
"input_cost_per_character_above_128k_tokens": 0.0000025, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
"output_cost_per_token": 0.000015, "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
"output_cost_per_character": 0.00000375, "input_cost_per_token_above_128k_tokens": 0.00000015625,
"output_cost_per_token_above_128k_tokens": 0.00003, "input_cost_per_character_above_128k_tokens": 0.000000625,
"output_cost_per_character_above_128k_tokens": 0.0000075, "output_cost_per_token": 0.0000003125,
"output_cost_per_image": 0.00263, "output_cost_per_character": 0.00000125,
"output_cost_per_video_per_second": 0.00263, "output_cost_per_token_above_128k_tokens": 0.000000625,
"output_cost_per_audio_per_second": 0.00025, "output_cost_per_character_above_128k_tokens": 0.0000025,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat", "mode": "chat",
"supports_function_calling": true, "supports_function_calling": true,
@ -2267,20 +2267,20 @@
"max_audio_length_hours": 8.4, "max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1, "max_audio_per_prompt": 1,
"max_pdf_size_mb": 30, "max_pdf_size_mb": 30,
"input_cost_per_image": 0.0001315, "input_cost_per_image": 0.00002,
"input_cost_per_video_per_second": 0.0001315, "input_cost_per_video_per_second": 0.00002,
"input_cost_per_audio_per_second": 0.000125, "input_cost_per_audio_per_second": 0.000002,
"input_cost_per_token": 0.0000005, "input_cost_per_token": 0.000000004688,
"input_cost_per_character": 0.000000125, "input_cost_per_character": 0.00000001875,
"input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_token_above_128k_tokens": 0.000001,
"input_cost_per_character_above_128k_tokens": 0.00000025, "input_cost_per_character_above_128k_tokens": 0.00000025,
"output_cost_per_token": 0.0000015, "input_cost_per_image_above_128k_tokens": 0.00004,
"output_cost_per_character": 0.000000375, "input_cost_per_video_per_second_above_128k_tokens": 0.00004,
"output_cost_per_token_above_128k_tokens": 0.000003, "input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
"output_cost_per_character_above_128k_tokens": 0.00000075, "output_cost_per_token": 0.0000000046875,
"output_cost_per_image": 0.000263, "output_cost_per_character": 0.00000001875,
"output_cost_per_video_per_second": 0.000263, "output_cost_per_token_above_128k_tokens": 0.000000009375,
"output_cost_per_audio_per_second": 0.00025, "output_cost_per_character_above_128k_tokens": 0.0000000375,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
@ -2299,20 +2299,20 @@
"max_audio_length_hours": 8.4, "max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1, "max_audio_per_prompt": 1,
"max_pdf_size_mb": 30, "max_pdf_size_mb": 30,
"input_cost_per_image": 0.0001315, "input_cost_per_image": 0.00002,
"input_cost_per_video_per_second": 0.0001315, "input_cost_per_video_per_second": 0.00002,
"input_cost_per_audio_per_second": 0.000125, "input_cost_per_audio_per_second": 0.000002,
"input_cost_per_token": 0.0000005, "input_cost_per_token": 0.000000004688,
"input_cost_per_character": 0.000000125, "input_cost_per_character": 0.00000001875,
"input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_token_above_128k_tokens": 0.000001,
"input_cost_per_character_above_128k_tokens": 0.00000025, "input_cost_per_character_above_128k_tokens": 0.00000025,
"output_cost_per_token": 0.0000015, "input_cost_per_image_above_128k_tokens": 0.00004,
"output_cost_per_character": 0.000000375, "input_cost_per_video_per_second_above_128k_tokens": 0.00004,
"output_cost_per_token_above_128k_tokens": 0.000003, "input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
"output_cost_per_character_above_128k_tokens": 0.00000075, "output_cost_per_token": 0.0000000046875,
"output_cost_per_image": 0.000263, "output_cost_per_character": 0.00000001875,
"output_cost_per_video_per_second": 0.000263, "output_cost_per_token_above_128k_tokens": 0.000000009375,
"output_cost_per_audio_per_second": 0.00025, "output_cost_per_character_above_128k_tokens": 0.0000000375,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
@ -2331,20 +2331,20 @@
"max_audio_length_hours": 8.4, "max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1, "max_audio_per_prompt": 1,
"max_pdf_size_mb": 30, "max_pdf_size_mb": 30,
"input_cost_per_image": 0.0001315, "input_cost_per_image": 0.00002,
"input_cost_per_video_per_second": 0.0001315, "input_cost_per_video_per_second": 0.00002,
"input_cost_per_audio_per_second": 0.000125, "input_cost_per_audio_per_second": 0.000002,
"input_cost_per_token": 0.0000005, "input_cost_per_token": 0.000000004688,
"input_cost_per_character": 0.000000125, "input_cost_per_character": 0.00000001875,
"input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_token_above_128k_tokens": 0.000001,
"input_cost_per_character_above_128k_tokens": 0.00000025, "input_cost_per_character_above_128k_tokens": 0.00000025,
"output_cost_per_token": 0.0000015, "input_cost_per_image_above_128k_tokens": 0.00004,
"output_cost_per_character": 0.000000375, "input_cost_per_video_per_second_above_128k_tokens": 0.00004,
"output_cost_per_token_above_128k_tokens": 0.000003, "input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
"output_cost_per_character_above_128k_tokens": 0.00000075, "output_cost_per_token": 0.0000000046875,
"output_cost_per_image": 0.000263, "output_cost_per_character": 0.00000001875,
"output_cost_per_video_per_second": 0.000263, "output_cost_per_token_above_128k_tokens": 0.000000009375,
"output_cost_per_audio_per_second": 0.00025, "output_cost_per_character_above_128k_tokens": 0.0000000375,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
@ -2363,20 +2363,20 @@
"max_audio_length_hours": 8.4, "max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1, "max_audio_per_prompt": 1,
"max_pdf_size_mb": 30, "max_pdf_size_mb": 30,
"input_cost_per_image": 0.0001315, "input_cost_per_image": 0.00002,
"input_cost_per_video_per_second": 0.0001315, "input_cost_per_video_per_second": 0.00002,
"input_cost_per_audio_per_second": 0.000125, "input_cost_per_audio_per_second": 0.000002,
"input_cost_per_token": 0.0000005, "input_cost_per_token": 0.000000004688,
"input_cost_per_character": 0.000000125, "input_cost_per_character": 0.00000001875,
"input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_token_above_128k_tokens": 0.000001,
"input_cost_per_character_above_128k_tokens": 0.00000025, "input_cost_per_character_above_128k_tokens": 0.00000025,
"output_cost_per_token": 0.0000015, "input_cost_per_image_above_128k_tokens": 0.00004,
"output_cost_per_character": 0.000000375, "input_cost_per_video_per_second_above_128k_tokens": 0.00004,
"output_cost_per_token_above_128k_tokens": 0.000003, "input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
"output_cost_per_character_above_128k_tokens": 0.00000075, "output_cost_per_token": 0.0000000046875,
"output_cost_per_image": 0.000263, "output_cost_per_character": 0.00000001875,
"output_cost_per_video_per_second": 0.000263, "output_cost_per_token_above_128k_tokens": 0.000000009375,
"output_cost_per_audio_per_second": 0.00025, "output_cost_per_character_above_128k_tokens": 0.0000000375,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
@ -2395,20 +2395,20 @@
"max_audio_length_hours": 8.4, "max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1, "max_audio_per_prompt": 1,
"max_pdf_size_mb": 30, "max_pdf_size_mb": 30,
"input_cost_per_image": 0.0001315, "input_cost_per_image": 0.00002,
"input_cost_per_video_per_second": 0.0001315, "input_cost_per_video_per_second": 0.00002,
"input_cost_per_audio_per_second": 0.000125, "input_cost_per_audio_per_second": 0.000002,
"input_cost_per_token": 0.0000005, "input_cost_per_token": 0.000000004688,
"input_cost_per_character": 0.000000125, "input_cost_per_character": 0.00000001875,
"input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_token_above_128k_tokens": 0.000001,
"input_cost_per_character_above_128k_tokens": 0.00000025, "input_cost_per_character_above_128k_tokens": 0.00000025,
"output_cost_per_token": 0.0000015, "input_cost_per_image_above_128k_tokens": 0.00004,
"output_cost_per_character": 0.000000375, "input_cost_per_video_per_second_above_128k_tokens": 0.00004,
"output_cost_per_token_above_128k_tokens": 0.000003, "input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
"output_cost_per_character_above_128k_tokens": 0.00000075, "output_cost_per_token": 0.0000000046875,
"output_cost_per_image": 0.000263, "output_cost_per_character": 0.00000001875,
"output_cost_per_video_per_second": 0.000263, "output_cost_per_token_above_128k_tokens": 0.000000009375,
"output_cost_per_audio_per_second": 0.00025, "output_cost_per_character_above_128k_tokens": 0.0000000375,
"litellm_provider": "vertex_ai-language-models", "litellm_provider": "vertex_ai-language-models",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,

View file

@ -664,9 +664,39 @@ def test_unmapped_gemini_model_params():
assert optional_params["stop_sequences"] == ["stop_word"] assert optional_params["stop_sequences"] == ["stop_word"]
def test_drop_nested_params_vllm(): def _check_additional_properties(schema):
if isinstance(schema, dict):
# Remove the 'additionalProperties' key if it exists and is set to False
if "additionalProperties" in schema or "strict" in schema:
raise ValueError(
"additionalProperties and strict should not be in the schema"
)
# Recursively process all dictionary values
for key, value in schema.items():
_check_additional_properties(value)
elif isinstance(schema, list):
# Recursively process all items in the list
for item in schema:
_check_additional_properties(item)
return schema
@pytest.mark.parametrize(
"provider, model",
[
("hosted_vllm", "my-vllm-model"),
("gemini", "gemini-1.5-pro"),
("vertex_ai", "gemini-1.5-pro"),
],
)
def test_drop_nested_params_add_prop_and_strict(provider, model):
""" """
Relevant issue - https://github.com/BerriAI/litellm/issues/5288 Relevant issue - https://github.com/BerriAI/litellm/issues/5288
Relevant issue - https://github.com/BerriAI/litellm/issues/6136
""" """
tools = [ tools = [
{ {
@ -690,8 +720,8 @@ def test_drop_nested_params_vllm():
] ]
tool_choice = {"type": "function", "function": {"name": "structure_output"}} tool_choice = {"type": "function", "function": {"name": "structure_output"}}
optional_params = get_optional_params( optional_params = get_optional_params(
model="my-vllm-model", model=model,
custom_llm_provider="hosted_vllm", custom_llm_provider=provider,
temperature=0.2, temperature=0.2,
tools=tools, tools=tools,
tool_choice=tool_choice, tool_choice=tool_choice,
@ -700,7 +730,5 @@ def test_drop_nested_params_vllm():
["tools", "function", "additionalProperties"], ["tools", "function", "additionalProperties"],
], ],
) )
print(optional_params["tools"][0]["function"])
assert "additionalProperties" not in optional_params["tools"][0]["function"] _check_additional_properties(optional_params["tools"])
assert "strict" not in optional_params["tools"][0]["function"]

View file

@ -0,0 +1,83 @@
import json
import os
import sys
import traceback
from dotenv import load_dotenv
load_dotenv()
import io
from unittest.mock import AsyncMock, MagicMock, patch
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
def test_completion_pydantic_obj_2():
from pydantic import BaseModel
from litellm.llms.custom_httpx.http_handler import HTTPHandler
litellm.set_verbose = True
class CalendarEvent(BaseModel):
name: str
date: str
participants: list[str]
class EventsList(BaseModel):
events: list[CalendarEvent]
messages = [
{"role": "user", "content": "List important events from the 20th century."}
]
expected_request_body = {
"contents": [
{
"role": "user",
"parts": [{"text": "List important events from the 20th century."}],
}
],
"generationConfig": {
"response_mime_type": "application/json",
"response_schema": {
"properties": {
"events": {
"items": {
"properties": {
"name": {"type": "string"},
"date": {"type": "string"},
"participants": {
"items": {"type": "string"},
"type": "array",
},
},
"type": "object",
},
"type": "array",
}
},
"type": "object",
},
},
}
client = HTTPHandler()
with patch.object(client, "post", new=MagicMock()) as mock_post:
mock_post.return_value = expected_request_body
try:
litellm.completion(
model="gemini/gemini-1.5-pro",
messages=messages,
response_format=EventsList,
client=client,
)
except Exception as e:
print(e)
mock_post.assert_called_once()
print(mock_post.call_args.kwargs)
assert mock_post.call_args.kwargs["json"] == expected_request_body

View file

@ -2209,3 +2209,28 @@ async def test_redis_proxy_batch_redis_get_cache():
print(response._hidden_params) print(response._hidden_params)
assert "cache_key" in response._hidden_params assert "cache_key" in response._hidden_params
def test_logging_turn_off_message_logging_streaming():
litellm.turn_off_message_logging = True
mock_obj = Cache(type="local")
litellm.cache = mock_obj
with patch.object(mock_obj, "add_cache", new=MagicMock()) as mock_client:
print(f"mock_obj.add_cache: {mock_obj.add_cache}")
resp = litellm.completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "hi"}],
mock_response="hello",
stream=True,
)
for chunk in resp:
continue
time.sleep(1)
mock_client.assert_called_once()
assert mock_client.call_args.args[0].choices[0].message.content == "hello"

View file

@ -1711,31 +1711,6 @@ def test_completion_perplexity_api():
# test_completion_perplexity_api() # test_completion_perplexity_api()
@pytest.mark.skip(
reason="too many requests. Hitting gemini rate limits. Convert to mock test."
)
def test_completion_pydantic_obj_2():
from pydantic import BaseModel
litellm.set_verbose = True
class CalendarEvent(BaseModel):
name: str
date: str
participants: list[str]
class EventsList(BaseModel):
events: list[CalendarEvent]
messages = [
{"role": "user", "content": "List important events from the 20th century."}
]
response = litellm.completion(
model="gemini/gemini-1.5-pro", messages=messages, response_format=EventsList
)
@pytest.mark.skip(reason="this test is flaky") @pytest.mark.skip(reason="this test is flaky")
def test_completion_perplexity_api_2(): def test_completion_perplexity_api_2():
try: try:
@ -4573,12 +4548,7 @@ async def test_completion_ai21_chat():
@pytest.mark.parametrize( @pytest.mark.parametrize(
"model", "model",
[ ["gpt-4o", "azure/chatgpt-v-2", "claude-3-sonnet-20240229"],
"gpt-4o",
"azure/chatgpt-v-2",
"claude-3-sonnet-20240229",
"fireworks_ai/mixtral-8x7b-instruct",
],
) )
@pytest.mark.parametrize( @pytest.mark.parametrize(
"stream", "stream",
@ -4594,5 +4564,7 @@ def test_completion_response_ratelimit_headers(model, stream):
additional_headers = hidden_params.get("additional_headers", {}) additional_headers = hidden_params.get("additional_headers", {})
print(additional_headers) print(additional_headers)
for k, v in additional_headers.items():
assert v != "None" and v is not None
assert "x-ratelimit-remaining-requests" in additional_headers assert "x-ratelimit-remaining-requests" in additional_headers
assert "x-ratelimit-remaining-tokens" in additional_headers assert "x-ratelimit-remaining-tokens" in additional_headers

View file

@ -2359,3 +2359,131 @@ def test_together_ai_embedding_completion_cost():
custom_llm_provider="together_ai", custom_llm_provider="together_ai",
call_type="embedding", call_type="embedding",
) )
def test_completion_cost_params():
"""
Relevant Issue: https://github.com/BerriAI/litellm/issues/6133
"""
litellm.set_verbose = True
resp1_prompt_cost, resp1_completion_cost = cost_per_token(
model="gemini-1.5-pro-002",
prompt_tokens=1000,
completion_tokens=1000,
custom_llm_provider="vertex_ai_beta",
)
resp2_prompt_cost, resp2_completion_cost = cost_per_token(
model="gemini-1.5-pro-002", prompt_tokens=1000, completion_tokens=1000
)
assert resp2_prompt_cost > 0
assert resp1_prompt_cost == resp2_prompt_cost
assert resp1_completion_cost == resp2_completion_cost
resp3_prompt_cost, resp3_completion_cost = cost_per_token(
model="vertex_ai/gemini-1.5-pro-002", prompt_tokens=1000, completion_tokens=1000
)
assert resp3_prompt_cost > 0
assert resp3_prompt_cost == resp1_prompt_cost
assert resp3_completion_cost == resp1_completion_cost
def test_completion_cost_params_2():
"""
Relevant Issue: https://github.com/BerriAI/litellm/issues/6133
"""
litellm.set_verbose = True
prompt_characters = 1000
completion_characters = 1000
resp1_prompt_cost, resp1_completion_cost = cost_per_token(
model="gemini-1.5-pro-002",
prompt_characters=prompt_characters,
completion_characters=completion_characters,
prompt_tokens=1000,
completion_tokens=1000,
)
print(resp1_prompt_cost, resp1_completion_cost)
model_info = litellm.get_model_info("gemini-1.5-pro-002")
input_cost_per_character = model_info["input_cost_per_character"]
output_cost_per_character = model_info["output_cost_per_character"]
assert resp1_prompt_cost == input_cost_per_character * prompt_characters
assert resp1_completion_cost == output_cost_per_character * completion_characters
def test_completion_cost_params_gemini_3():
from litellm.utils import Choices, Message, ModelResponse, Usage
from litellm.litellm_core_utils.llm_cost_calc.google import cost_per_character
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
response = ModelResponse(
id="chatcmpl-61043504-4439-48be-9996-e29bdee24dc3",
choices=[
Choices(
finish_reason="stop",
index=0,
message=Message(
content="Sí. \n",
role="assistant",
tool_calls=None,
function_call=None,
),
)
],
created=1728529259,
model="gemini-1.5-flash",
object="chat.completion",
system_fingerprint=None,
usage=Usage(
completion_tokens=2,
prompt_tokens=3771,
total_tokens=3773,
completion_tokens_details=None,
prompt_tokens_details=None,
),
vertex_ai_grounding_metadata=[],
vertex_ai_safety_results=[
[
{
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
"probability": "NEGLIGIBLE",
},
{"category": "HARM_CATEGORY_HATE_SPEECH", "probability": "NEGLIGIBLE"},
{"category": "HARM_CATEGORY_HARASSMENT", "probability": "NEGLIGIBLE"},
{
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
"probability": "NEGLIGIBLE",
},
]
],
vertex_ai_citation_metadata=[],
)
pc, cc = cost_per_character(
**{
"model": "gemini-1.5-flash",
"custom_llm_provider": "vertex_ai",
"prompt_tokens": 3771,
"completion_tokens": 2,
"prompt_characters": None,
"completion_characters": 3,
}
)
model_info = litellm.get_model_info("gemini-1.5-flash")
assert round(pc, 10) == round(3771 * model_info["input_cost_per_token"], 10)
assert round(cc, 10) == round(
3 * model_info["output_cost_per_character"],
10,
)

View file

@ -1414,6 +1414,7 @@ def test_logging_standard_payload_llm_headers(stream):
with patch.object( with patch.object(
customHandler, "log_success_event", new=MagicMock() customHandler, "log_success_event", new=MagicMock()
) as mock_client: ) as mock_client:
resp = litellm.completion( resp = litellm.completion(
model="gpt-3.5-turbo", model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hey, how's it going?"}], messages=[{"role": "user", "content": "Hey, how's it going?"}],

View file

@ -68,3 +68,9 @@ def test_get_model_info_finetuned_models():
info = litellm.get_model_info("ft:gpt-3.5-turbo:my-org:custom_suffix:id") info = litellm.get_model_info("ft:gpt-3.5-turbo:my-org:custom_suffix:id")
print("info", info) print("info", info)
assert info["input_cost_per_token"] == 0.000003 assert info["input_cost_per_token"] == 0.000003
def test_get_model_info_gemini_pro():
info = litellm.get_model_info("gemini-1.5-pro-002")
print("info", info)
assert info["key"] == "gemini-1.5-pro-002"