forked from phoenix/litellm-mirror
LiteLLM Minor Fixes & Improvements (10/09/2024) (#6139)
* fix(utils.py): don't return 'none' response headers Fixes https://github.com/BerriAI/litellm/issues/6123 * fix(vertex_and_google_ai_studio_gemini.py): support parsing out additional properties and strict value for tool calls Fixes https://github.com/BerriAI/litellm/issues/6136 * fix(cost_calculator.py): set default character value to none Fixes https://github.com/BerriAI/litellm/issues/6133#issuecomment-2403290196 * fix(google.py): fix cost per token / cost per char conversion Fixes https://github.com/BerriAI/litellm/issues/6133#issuecomment-2403370287 * build(model_prices_and_context_window.json): update gemini pricing Fixes https://github.com/BerriAI/litellm/issues/6133 * build(model_prices_and_context_window.json): update gemini pricing * fix(litellm_logging.py): fix streaming caching logging when 'turn_off_message_logging' enabled Stores unredacted response in cache * build(model_prices_and_context_window.json): update gemini-1.5-flash pricing * fix(cost_calculator.py): fix default prompt_character count logic Fixes error in gemini cost calculation * fix(cost_calculator.py): fix cost calc for tts models
This commit is contained in:
parent
60baa65e0e
commit
6005450c8f
16 changed files with 788 additions and 534 deletions
|
@ -87,8 +87,8 @@ def cost_per_token(
|
||||||
custom_llm_provider: Optional[str] = None,
|
custom_llm_provider: Optional[str] = None,
|
||||||
region_name=None,
|
region_name=None,
|
||||||
### CHARACTER PRICING ###
|
### CHARACTER PRICING ###
|
||||||
prompt_characters: int = 0,
|
prompt_characters: Optional[int] = None,
|
||||||
completion_characters: int = 0,
|
completion_characters: Optional[int] = None,
|
||||||
### PROMPT CACHING PRICING ### - used for anthropic
|
### PROMPT CACHING PRICING ### - used for anthropic
|
||||||
cache_creation_input_tokens: Optional[int] = 0,
|
cache_creation_input_tokens: Optional[int] = 0,
|
||||||
cache_read_input_tokens: Optional[int] = 0,
|
cache_read_input_tokens: Optional[int] = 0,
|
||||||
|
@ -201,13 +201,24 @@ def cost_per_token(
|
||||||
model = model_without_prefix
|
model = model_without_prefix
|
||||||
|
|
||||||
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
|
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
|
||||||
print_verbose(f"Looking up model={model} in model_cost_map")
|
print_verbose(
|
||||||
|
f"Looking up model={model} in model_cost_map, custom_llm_provider={custom_llm_provider}, call_type={call_type}"
|
||||||
|
)
|
||||||
if call_type == "speech" or call_type == "aspeech":
|
if call_type == "speech" or call_type == "aspeech":
|
||||||
|
if prompt_characters is None:
|
||||||
|
raise ValueError(
|
||||||
|
"prompt_characters must be provided for tts calls. prompt_characters={}, model={}, custom_llm_provider={}, call_type={}".format(
|
||||||
|
prompt_characters,
|
||||||
|
model,
|
||||||
|
custom_llm_provider,
|
||||||
|
call_type,
|
||||||
|
)
|
||||||
|
)
|
||||||
prompt_cost, completion_cost = _generic_cost_per_character(
|
prompt_cost, completion_cost = _generic_cost_per_character(
|
||||||
model=model_without_prefix,
|
model=model_without_prefix,
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
prompt_characters=prompt_characters,
|
prompt_characters=prompt_characters,
|
||||||
completion_characters=completion_characters,
|
completion_characters=0,
|
||||||
custom_prompt_cost=None,
|
custom_prompt_cost=None,
|
||||||
custom_completion_cost=0,
|
custom_completion_cost=0,
|
||||||
)
|
)
|
||||||
|
@ -232,10 +243,6 @@ def cost_per_token(
|
||||||
cost_router = google_cost_router(
|
cost_router = google_cost_router(
|
||||||
model=model_without_prefix,
|
model=model_without_prefix,
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
prompt_characters=prompt_characters,
|
|
||||||
completion_characters=completion_characters,
|
|
||||||
prompt_tokens=prompt_tokens,
|
|
||||||
completion_tokens=completion_tokens,
|
|
||||||
call_type=call_type,
|
call_type=call_type,
|
||||||
)
|
)
|
||||||
if cost_router == "cost_per_character":
|
if cost_router == "cost_per_character":
|
||||||
|
@ -542,9 +549,9 @@ def completion_cost(
|
||||||
model = "dall-e-2" # for dall-e-2, azure expects an empty model name
|
model = "dall-e-2" # for dall-e-2, azure expects an empty model name
|
||||||
# Handle Inputs to completion_cost
|
# Handle Inputs to completion_cost
|
||||||
prompt_tokens = 0
|
prompt_tokens = 0
|
||||||
prompt_characters = 0
|
prompt_characters: Optional[int] = None
|
||||||
completion_tokens = 0
|
completion_tokens = 0
|
||||||
completion_characters = 0
|
completion_characters: Optional[int] = None
|
||||||
cache_creation_input_tokens: Optional[int] = None
|
cache_creation_input_tokens: Optional[int] = None
|
||||||
cache_read_input_tokens: Optional[int] = None
|
cache_read_input_tokens: Optional[int] = None
|
||||||
if completion_response is not None and (
|
if completion_response is not None and (
|
||||||
|
@ -721,10 +728,8 @@ def completion_cost(
|
||||||
prompt_string = litellm.utils.get_formatted_prompt(
|
prompt_string = litellm.utils.get_formatted_prompt(
|
||||||
data={"messages": messages}, call_type="completion"
|
data={"messages": messages}, call_type="completion"
|
||||||
)
|
)
|
||||||
else:
|
|
||||||
prompt_string = ""
|
|
||||||
|
|
||||||
prompt_characters = litellm.utils._count_characters(text=prompt_string)
|
prompt_characters = litellm.utils._count_characters(text=prompt_string)
|
||||||
if completion_response is not None and isinstance(
|
if completion_response is not None and isinstance(
|
||||||
completion_response, ModelResponse
|
completion_response, ModelResponse
|
||||||
):
|
):
|
||||||
|
|
|
@ -901,7 +901,9 @@ class Logging:
|
||||||
complete_streaming_response = None
|
complete_streaming_response = None
|
||||||
else:
|
else:
|
||||||
self.sync_streaming_chunks.append(result)
|
self.sync_streaming_chunks.append(result)
|
||||||
|
_caching_complete_streaming_response: Optional[
|
||||||
|
Union[ModelResponse, TextCompletionResponse]
|
||||||
|
] = None
|
||||||
if complete_streaming_response is not None:
|
if complete_streaming_response is not None:
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
"Logging Details LiteLLM-Success Call streaming complete"
|
"Logging Details LiteLLM-Success Call streaming complete"
|
||||||
|
@ -909,6 +911,9 @@ class Logging:
|
||||||
self.model_call_details["complete_streaming_response"] = (
|
self.model_call_details["complete_streaming_response"] = (
|
||||||
complete_streaming_response
|
complete_streaming_response
|
||||||
)
|
)
|
||||||
|
_caching_complete_streaming_response = copy.deepcopy(
|
||||||
|
complete_streaming_response
|
||||||
|
)
|
||||||
self.model_call_details["response_cost"] = (
|
self.model_call_details["response_cost"] = (
|
||||||
self._response_cost_calculator(result=complete_streaming_response)
|
self._response_cost_calculator(result=complete_streaming_response)
|
||||||
)
|
)
|
||||||
|
@ -937,6 +942,20 @@ class Logging:
|
||||||
else:
|
else:
|
||||||
callbacks = litellm.success_callback
|
callbacks = litellm.success_callback
|
||||||
|
|
||||||
|
## STREAMING CACHING ##
|
||||||
|
if "cache" in callbacks and litellm.cache is not None:
|
||||||
|
# this only logs streaming once, complete_streaming_response exists i.e when stream ends
|
||||||
|
print_verbose("success_callback: reaches cache for logging!")
|
||||||
|
kwargs = self.model_call_details
|
||||||
|
if self.stream and _caching_complete_streaming_response is not None:
|
||||||
|
print_verbose(
|
||||||
|
"success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
|
||||||
|
)
|
||||||
|
result = _caching_complete_streaming_response
|
||||||
|
# only add to cache once we have a complete streaming response
|
||||||
|
litellm.cache.add_cache(result, **kwargs)
|
||||||
|
|
||||||
|
## REDACT MESSAGES ##
|
||||||
result = redact_message_input_output_from_logging(
|
result = redact_message_input_output_from_logging(
|
||||||
model_call_details=(
|
model_call_details=(
|
||||||
self.model_call_details
|
self.model_call_details
|
||||||
|
@ -1302,23 +1321,6 @@ class Logging:
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
print_verbose=print_verbose,
|
print_verbose=print_verbose,
|
||||||
)
|
)
|
||||||
if callback == "cache" and litellm.cache is not None:
|
|
||||||
# this only logs streaming once, complete_streaming_response exists i.e when stream ends
|
|
||||||
print_verbose("success_callback: reaches cache for logging!")
|
|
||||||
kwargs = self.model_call_details
|
|
||||||
if self.stream:
|
|
||||||
if "complete_streaming_response" not in kwargs:
|
|
||||||
print_verbose(
|
|
||||||
f"success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
|
|
||||||
)
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
print_verbose(
|
|
||||||
"success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
|
|
||||||
)
|
|
||||||
result = kwargs["complete_streaming_response"]
|
|
||||||
# only add to cache once we have a complete streaming response
|
|
||||||
litellm.cache.add_cache(result, **kwargs)
|
|
||||||
if callback == "athina" and athinaLogger is not None:
|
if callback == "athina" and athinaLogger is not None:
|
||||||
deep_copy = {}
|
deep_copy = {}
|
||||||
for k, v in self.model_call_details.items():
|
for k, v in self.model_call_details.items():
|
||||||
|
|
|
@ -32,10 +32,6 @@ def _is_above_128k(tokens: float) -> bool:
|
||||||
def cost_router(
|
def cost_router(
|
||||||
model: str,
|
model: str,
|
||||||
custom_llm_provider: str,
|
custom_llm_provider: str,
|
||||||
prompt_tokens: float,
|
|
||||||
completion_tokens: float,
|
|
||||||
prompt_characters: float,
|
|
||||||
completion_characters: float,
|
|
||||||
call_type: Union[Literal["embedding", "aembedding"], str],
|
call_type: Union[Literal["embedding", "aembedding"], str],
|
||||||
) -> Literal["cost_per_character", "cost_per_token"]:
|
) -> Literal["cost_per_character", "cost_per_token"]:
|
||||||
"""
|
"""
|
||||||
|
@ -66,8 +62,8 @@ def cost_per_character(
|
||||||
custom_llm_provider: str,
|
custom_llm_provider: str,
|
||||||
prompt_tokens: float,
|
prompt_tokens: float,
|
||||||
completion_tokens: float,
|
completion_tokens: float,
|
||||||
prompt_characters: float,
|
prompt_characters: Optional[float] = None,
|
||||||
completion_characters: float,
|
completion_characters: Optional[float] = None,
|
||||||
) -> Tuple[float, float]:
|
) -> Tuple[float, float]:
|
||||||
"""
|
"""
|
||||||
Calculates the cost per character for a given VertexAI model, input messages, and response object.
|
Calculates the cost per character for a given VertexAI model, input messages, and response object.
|
||||||
|
@ -94,87 +90,100 @@ def cost_per_character(
|
||||||
)
|
)
|
||||||
|
|
||||||
## CALCULATE INPUT COST
|
## CALCULATE INPUT COST
|
||||||
try:
|
if prompt_characters is None:
|
||||||
if (
|
prompt_cost, _ = cost_per_token(
|
||||||
_is_above_128k(tokens=prompt_characters * 4) # 1 token = 4 char
|
|
||||||
and model not in models_without_dynamic_pricing
|
|
||||||
):
|
|
||||||
## check if character pricing, else default to token pricing
|
|
||||||
assert (
|
|
||||||
"input_cost_per_character_above_128k_tokens" in model_info
|
|
||||||
and model_info["input_cost_per_character_above_128k_tokens"] is not None
|
|
||||||
), "model info for model={} does not have 'input_cost_per_character_above_128k_tokens'-pricing for > 128k tokens\nmodel_info={}".format(
|
|
||||||
model, model_info
|
|
||||||
)
|
|
||||||
prompt_cost = (
|
|
||||||
prompt_characters
|
|
||||||
* model_info["input_cost_per_character_above_128k_tokens"]
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
assert (
|
|
||||||
"input_cost_per_character" in model_info
|
|
||||||
and model_info["input_cost_per_character"] is not None
|
|
||||||
), "model info for model={} does not have 'input_cost_per_character'-pricing\nmodel_info={}".format(
|
|
||||||
model, model_info
|
|
||||||
)
|
|
||||||
prompt_cost = prompt_characters * model_info["input_cost_per_character"]
|
|
||||||
except Exception as e:
|
|
||||||
verbose_logger.exception(
|
|
||||||
"litellm.litellm_core_utils.llm_cost_calc.google.cost_per_character(): Defaulting to (cost_per_token * 4) calculation for prompt_cost. Exception occured - {}".format(
|
|
||||||
str(e)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
initial_prompt_cost, _ = cost_per_token(
|
|
||||||
model=model,
|
model=model,
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
prompt_tokens=prompt_tokens,
|
prompt_tokens=prompt_tokens,
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
prompt_cost = initial_prompt_cost * 4
|
try:
|
||||||
|
if (
|
||||||
|
_is_above_128k(tokens=prompt_characters * 4) # 1 token = 4 char
|
||||||
|
and model not in models_without_dynamic_pricing
|
||||||
|
):
|
||||||
|
## check if character pricing, else default to token pricing
|
||||||
|
assert (
|
||||||
|
"input_cost_per_character_above_128k_tokens" in model_info
|
||||||
|
and model_info["input_cost_per_character_above_128k_tokens"]
|
||||||
|
is not None
|
||||||
|
), "model info for model={} does not have 'input_cost_per_character_above_128k_tokens'-pricing for > 128k tokens\nmodel_info={}".format(
|
||||||
|
model, model_info
|
||||||
|
)
|
||||||
|
prompt_cost = (
|
||||||
|
prompt_characters
|
||||||
|
* model_info["input_cost_per_character_above_128k_tokens"]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
assert (
|
||||||
|
"input_cost_per_character" in model_info
|
||||||
|
and model_info["input_cost_per_character"] is not None
|
||||||
|
), "model info for model={} does not have 'input_cost_per_character'-pricing\nmodel_info={}".format(
|
||||||
|
model, model_info
|
||||||
|
)
|
||||||
|
prompt_cost = prompt_characters * model_info["input_cost_per_character"]
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.debug(
|
||||||
|
"litellm.litellm_core_utils.llm_cost_calc.google.py::cost_per_character(): Exception occured - {}\nDefaulting to None".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
prompt_cost, _ = cost_per_token(
|
||||||
|
model=model,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
prompt_tokens=prompt_tokens,
|
||||||
|
completion_tokens=completion_tokens,
|
||||||
|
)
|
||||||
|
|
||||||
## CALCULATE OUTPUT COST
|
## CALCULATE OUTPUT COST
|
||||||
try:
|
if completion_characters is None:
|
||||||
if (
|
_, completion_cost = cost_per_token(
|
||||||
_is_above_128k(tokens=completion_characters * 4) # 1 token = 4 char
|
|
||||||
and model not in models_without_dynamic_pricing
|
|
||||||
):
|
|
||||||
assert (
|
|
||||||
"output_cost_per_character_above_128k_tokens" in model_info
|
|
||||||
and model_info["output_cost_per_character_above_128k_tokens"]
|
|
||||||
is not None
|
|
||||||
), "model info for model={} does not have 'output_cost_per_character_above_128k_tokens' pricing\nmodel_info={}".format(
|
|
||||||
model, model_info
|
|
||||||
)
|
|
||||||
completion_cost = (
|
|
||||||
completion_tokens
|
|
||||||
* model_info["output_cost_per_character_above_128k_tokens"]
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
assert (
|
|
||||||
"output_cost_per_character" in model_info
|
|
||||||
and model_info["output_cost_per_character"] is not None
|
|
||||||
), "model info for model={} does not have 'output_cost_per_character'-pricing\nmodel_info={}".format(
|
|
||||||
model, model_info
|
|
||||||
)
|
|
||||||
completion_cost = (
|
|
||||||
completion_tokens * model_info["output_cost_per_character"]
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
verbose_logger.exception(
|
|
||||||
"litellm.litellm_core_utils.llm_cost_calc.google.cost_per_character(): \
|
|
||||||
Defaulting to (cost_per_token * 4) calculation for completion_cost\nException occured - {}".format(
|
|
||||||
str(e)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
_, initial_completion_cost = cost_per_token(
|
|
||||||
model=model,
|
model=model,
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
prompt_tokens=prompt_tokens,
|
prompt_tokens=prompt_tokens,
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
if (
|
||||||
|
_is_above_128k(tokens=completion_characters * 4) # 1 token = 4 char
|
||||||
|
and model not in models_without_dynamic_pricing
|
||||||
|
):
|
||||||
|
assert (
|
||||||
|
"output_cost_per_character_above_128k_tokens" in model_info
|
||||||
|
and model_info["output_cost_per_character_above_128k_tokens"]
|
||||||
|
is not None
|
||||||
|
), "model info for model={} does not have 'output_cost_per_character_above_128k_tokens' pricing\nmodel_info={}".format(
|
||||||
|
model, model_info
|
||||||
|
)
|
||||||
|
completion_cost = (
|
||||||
|
completion_tokens
|
||||||
|
* model_info["output_cost_per_character_above_128k_tokens"]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
assert (
|
||||||
|
"output_cost_per_character" in model_info
|
||||||
|
and model_info["output_cost_per_character"] is not None
|
||||||
|
), "model info for model={} does not have 'output_cost_per_character'-pricing\nmodel_info={}".format(
|
||||||
|
model, model_info
|
||||||
|
)
|
||||||
|
completion_cost = (
|
||||||
|
completion_characters * model_info["output_cost_per_character"]
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.debug(
|
||||||
|
"litellm.litellm_core_utils.llm_cost_calc.google.py::cost_per_character(): Exception occured - {}\nDefaulting to None".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
_, completion_cost = cost_per_token(
|
||||||
|
model=model,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
prompt_tokens=prompt_tokens,
|
||||||
|
completion_tokens=completion_tokens,
|
||||||
|
)
|
||||||
|
|
||||||
completion_cost = initial_completion_cost * 4
|
|
||||||
return prompt_cost, completion_cost
|
return prompt_cost, completion_cost
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -17,9 +17,8 @@ def _generic_cost_per_character(
|
||||||
custom_completion_cost: Optional[float],
|
custom_completion_cost: Optional[float],
|
||||||
) -> Tuple[Optional[float], Optional[float]]:
|
) -> Tuple[Optional[float], Optional[float]]:
|
||||||
"""
|
"""
|
||||||
Generic function to help calculate cost per character.
|
Calculates cost per character for aspeech/speech calls.
|
||||||
"""
|
|
||||||
"""
|
|
||||||
Calculates the cost per character for a given model, input messages, and response object.
|
Calculates the cost per character for a given model, input messages, and response object.
|
||||||
|
|
||||||
Input:
|
Input:
|
||||||
|
@ -29,7 +28,7 @@ def _generic_cost_per_character(
|
||||||
- completion_characters: float, the number of output characters
|
- completion_characters: float, the number of output characters
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple[Optional[float], Optional[float]] - prompt_cost_in_usd, completion_cost_in_usd.
|
Tuple[Optional[float], Optional[float]] - prompt_cost_in_usd, completion_cost_in_usd.
|
||||||
- returns None if not able to calculate cost.
|
- returns None if not able to calculate cost.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
|
|
|
@ -7,6 +7,7 @@ import os
|
||||||
import time
|
import time
|
||||||
import types
|
import types
|
||||||
import uuid
|
import uuid
|
||||||
|
from copy import deepcopy
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from typing import (
|
from typing import (
|
||||||
|
@ -65,9 +66,11 @@ from litellm.types.llms.vertex_ai import (
|
||||||
from litellm.types.utils import GenericStreamingChunk
|
from litellm.types.utils import GenericStreamingChunk
|
||||||
from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
|
from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
|
||||||
|
|
||||||
|
from ....utils import _remove_additional_properties, _remove_strict_from_schema
|
||||||
from ...base import BaseLLM
|
from ...base import BaseLLM
|
||||||
from ..common_utils import (
|
from ..common_utils import (
|
||||||
VertexAIError,
|
VertexAIError,
|
||||||
|
_build_vertex_schema,
|
||||||
_get_gemini_url,
|
_get_gemini_url,
|
||||||
_get_vertex_url,
|
_get_vertex_url,
|
||||||
all_gemini_url_modes,
|
all_gemini_url_modes,
|
||||||
|
@ -376,7 +379,10 @@ class VertexGeminiConfig:
|
||||||
def _map_function(self, value: List[dict]) -> List[Tools]:
|
def _map_function(self, value: List[dict]) -> List[Tools]:
|
||||||
gtool_func_declarations = []
|
gtool_func_declarations = []
|
||||||
googleSearchRetrieval: Optional[dict] = None
|
googleSearchRetrieval: Optional[dict] = None
|
||||||
|
# remove 'additionalProperties' from tools
|
||||||
|
value = _remove_additional_properties(value)
|
||||||
|
# remove 'strict' from tools
|
||||||
|
value = _remove_strict_from_schema(value)
|
||||||
for tool in value:
|
for tool in value:
|
||||||
openai_function_object: Optional[ChatCompletionToolParamFunctionChunk] = (
|
openai_function_object: Optional[ChatCompletionToolParamFunctionChunk] = (
|
||||||
None
|
None
|
||||||
|
@ -437,6 +443,10 @@ class VertexGeminiConfig:
|
||||||
if param == "max_tokens" or param == "max_completion_tokens":
|
if param == "max_tokens" or param == "max_completion_tokens":
|
||||||
optional_params["max_output_tokens"] = value
|
optional_params["max_output_tokens"] = value
|
||||||
if param == "response_format" and isinstance(value, dict): # type: ignore
|
if param == "response_format" and isinstance(value, dict): # type: ignore
|
||||||
|
# remove 'additionalProperties' from json schema
|
||||||
|
value = _remove_additional_properties(value)
|
||||||
|
# remove 'strict' from json schema
|
||||||
|
value = _remove_strict_from_schema(value)
|
||||||
if value["type"] == "json_object":
|
if value["type"] == "json_object":
|
||||||
optional_params["response_mime_type"] = "application/json"
|
optional_params["response_mime_type"] = "application/json"
|
||||||
elif value["type"] == "text":
|
elif value["type"] == "text":
|
||||||
|
@ -448,6 +458,19 @@ class VertexGeminiConfig:
|
||||||
if "json_schema" in value and "schema" in value["json_schema"]: # type: ignore
|
if "json_schema" in value and "schema" in value["json_schema"]: # type: ignore
|
||||||
optional_params["response_mime_type"] = "application/json"
|
optional_params["response_mime_type"] = "application/json"
|
||||||
optional_params["response_schema"] = value["json_schema"]["schema"] # type: ignore
|
optional_params["response_schema"] = value["json_schema"]["schema"] # type: ignore
|
||||||
|
|
||||||
|
if "response_schema" in optional_params and isinstance(
|
||||||
|
optional_params["response_schema"], dict
|
||||||
|
):
|
||||||
|
old_schema = deepcopy(optional_params["response_schema"])
|
||||||
|
|
||||||
|
if isinstance(old_schema, list):
|
||||||
|
for item in old_schema:
|
||||||
|
if isinstance(item, dict):
|
||||||
|
item = _build_vertex_schema(parameters=item)
|
||||||
|
elif isinstance(old_schema, dict):
|
||||||
|
old_schema = _build_vertex_schema(parameters=old_schema)
|
||||||
|
optional_params["response_schema"] = old_schema
|
||||||
if param == "frequency_penalty":
|
if param == "frequency_penalty":
|
||||||
optional_params["frequency_penalty"] = value
|
optional_params["frequency_penalty"] = value
|
||||||
if param == "presence_penalty":
|
if param == "presence_penalty":
|
||||||
|
|
|
@ -2106,20 +2106,20 @@
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 2097152,
|
"max_input_tokens": 2097152,
|
||||||
"max_output_tokens": 8192,
|
"max_output_tokens": 8192,
|
||||||
"input_cost_per_image": 0.001315,
|
"input_cost_per_image": 0.00032875,
|
||||||
"input_cost_per_audio_per_second": 0.000125,
|
"input_cost_per_audio_per_second": 0.00003125,
|
||||||
"input_cost_per_video_per_second": 0.001315,
|
"input_cost_per_video_per_second": 0.00032875,
|
||||||
"input_cost_per_token": 0.000005,
|
"input_cost_per_token": 0.000000078125,
|
||||||
"input_cost_per_character": 0.00000125,
|
"input_cost_per_character": 0.0000003125,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.00001,
|
"input_cost_per_image_above_128k_tokens": 0.0006575,
|
||||||
"input_cost_per_character_above_128k_tokens": 0.0000025,
|
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
|
||||||
"output_cost_per_token": 0.000015,
|
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
|
||||||
"output_cost_per_character": 0.00000375,
|
"input_cost_per_token_above_128k_tokens": 0.00000015625,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.00003,
|
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_character_above_128k_tokens": 0.0000075,
|
"output_cost_per_token": 0.0000003125,
|
||||||
"output_cost_per_image": 0.00263,
|
"output_cost_per_character": 0.00000125,
|
||||||
"output_cost_per_video_per_second": 0.00263,
|
"output_cost_per_token_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_character_above_128k_tokens": 0.0000025,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
|
@ -2132,20 +2132,20 @@
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 2097152,
|
"max_input_tokens": 2097152,
|
||||||
"max_output_tokens": 8192,
|
"max_output_tokens": 8192,
|
||||||
"input_cost_per_image": 0.001315,
|
"input_cost_per_image": 0.00032875,
|
||||||
"input_cost_per_audio_per_second": 0.000125,
|
"input_cost_per_audio_per_second": 0.00003125,
|
||||||
"input_cost_per_video_per_second": 0.001315,
|
"input_cost_per_video_per_second": 0.00032875,
|
||||||
"input_cost_per_token": 0.000005,
|
"input_cost_per_token": 0.000000078125,
|
||||||
"input_cost_per_character": 0.00000125,
|
"input_cost_per_character": 0.0000003125,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.00001,
|
"input_cost_per_image_above_128k_tokens": 0.0006575,
|
||||||
"input_cost_per_character_above_128k_tokens": 0.0000025,
|
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
|
||||||
"output_cost_per_token": 0.000015,
|
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
|
||||||
"output_cost_per_character": 0.00000375,
|
"input_cost_per_token_above_128k_tokens": 0.00000015625,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.00003,
|
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_character_above_128k_tokens": 0.0000075,
|
"output_cost_per_token": 0.0000003125,
|
||||||
"output_cost_per_image": 0.00263,
|
"output_cost_per_character": 0.00000125,
|
||||||
"output_cost_per_video_per_second": 0.00263,
|
"output_cost_per_token_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_character_above_128k_tokens": 0.0000025,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
|
@ -2158,20 +2158,20 @@
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 1000000,
|
"max_input_tokens": 1000000,
|
||||||
"max_output_tokens": 8192,
|
"max_output_tokens": 8192,
|
||||||
"input_cost_per_image": 0.001315,
|
"input_cost_per_image": 0.00032875,
|
||||||
"input_cost_per_audio_per_second": 0.000125,
|
"input_cost_per_audio_per_second": 0.00003125,
|
||||||
"input_cost_per_video_per_second": 0.001315,
|
"input_cost_per_video_per_second": 0.00032875,
|
||||||
"input_cost_per_token": 0.000005,
|
"input_cost_per_token": 0.000000078125,
|
||||||
"input_cost_per_character": 0.00000125,
|
"input_cost_per_character": 0.0000003125,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.00001,
|
"input_cost_per_image_above_128k_tokens": 0.0006575,
|
||||||
"input_cost_per_character_above_128k_tokens": 0.0000025,
|
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
|
||||||
"output_cost_per_token": 0.000015,
|
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
|
||||||
"output_cost_per_character": 0.00000375,
|
"input_cost_per_token_above_128k_tokens": 0.00000015625,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.00003,
|
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_character_above_128k_tokens": 0.0000075,
|
"output_cost_per_token": 0.0000003125,
|
||||||
"output_cost_per_image": 0.00263,
|
"output_cost_per_character": 0.00000125,
|
||||||
"output_cost_per_video_per_second": 0.00263,
|
"output_cost_per_token_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_character_above_128k_tokens": 0.0000025,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
|
@ -2184,20 +2184,20 @@
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 1000000,
|
"max_input_tokens": 1000000,
|
||||||
"max_output_tokens": 8192,
|
"max_output_tokens": 8192,
|
||||||
"input_cost_per_image": 0.001315,
|
"input_cost_per_image": 0.00032875,
|
||||||
"input_cost_per_audio_per_second": 0.000125,
|
"input_cost_per_audio_per_second": 0.00003125,
|
||||||
"input_cost_per_video_per_second": 0.001315,
|
"input_cost_per_video_per_second": 0.00032875,
|
||||||
"input_cost_per_token": 0.000005,
|
"input_cost_per_token": 0.000000078125,
|
||||||
"input_cost_per_character": 0.00000125,
|
"input_cost_per_character": 0.0000003125,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.00001,
|
"input_cost_per_image_above_128k_tokens": 0.0006575,
|
||||||
"input_cost_per_character_above_128k_tokens": 0.0000025,
|
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
|
||||||
"output_cost_per_token": 0.000015,
|
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
|
||||||
"output_cost_per_character": 0.00000375,
|
"input_cost_per_token_above_128k_tokens": 0.00000015625,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.00003,
|
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_character_above_128k_tokens": 0.0000075,
|
"output_cost_per_token": 0.0000003125,
|
||||||
"output_cost_per_image": 0.00263,
|
"output_cost_per_character": 0.00000125,
|
||||||
"output_cost_per_video_per_second": 0.00263,
|
"output_cost_per_token_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_character_above_128k_tokens": 0.0000025,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
|
@ -2210,20 +2210,20 @@
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 1000000,
|
"max_input_tokens": 1000000,
|
||||||
"max_output_tokens": 8192,
|
"max_output_tokens": 8192,
|
||||||
"input_cost_per_image": 0.001315,
|
"input_cost_per_image": 0.00032875,
|
||||||
"input_cost_per_audio_per_second": 0.000125,
|
"input_cost_per_audio_per_second": 0.00003125,
|
||||||
"input_cost_per_video_per_second": 0.001315,
|
"input_cost_per_video_per_second": 0.00032875,
|
||||||
"input_cost_per_token": 0.000005,
|
"input_cost_per_token": 0.000000078125,
|
||||||
"input_cost_per_character": 0.00000125,
|
"input_cost_per_character": 0.0000003125,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.00001,
|
"input_cost_per_image_above_128k_tokens": 0.0006575,
|
||||||
"input_cost_per_character_above_128k_tokens": 0.0000025,
|
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
|
||||||
"output_cost_per_token": 0.000015,
|
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
|
||||||
"output_cost_per_character": 0.00000375,
|
"input_cost_per_token_above_128k_tokens": 0.00000015625,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.00003,
|
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_character_above_128k_tokens": 0.0000075,
|
"output_cost_per_token": 0.0000003125,
|
||||||
"output_cost_per_image": 0.00263,
|
"output_cost_per_character": 0.00000125,
|
||||||
"output_cost_per_video_per_second": 0.00263,
|
"output_cost_per_token_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_character_above_128k_tokens": 0.0000025,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
|
@ -2236,20 +2236,20 @@
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 1000000,
|
"max_input_tokens": 1000000,
|
||||||
"max_output_tokens": 8192,
|
"max_output_tokens": 8192,
|
||||||
"input_cost_per_image": 0.001315,
|
"input_cost_per_image": 0.00032875,
|
||||||
"input_cost_per_audio_per_second": 0.000125,
|
"input_cost_per_audio_per_second": 0.00003125,
|
||||||
"input_cost_per_video_per_second": 0.001315,
|
"input_cost_per_video_per_second": 0.00032875,
|
||||||
"input_cost_per_token": 0.000005,
|
"input_cost_per_token": 0.000000078125,
|
||||||
"input_cost_per_character": 0.00000125,
|
"input_cost_per_character": 0.0000003125,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.00001,
|
"input_cost_per_image_above_128k_tokens": 0.0006575,
|
||||||
"input_cost_per_character_above_128k_tokens": 0.0000025,
|
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
|
||||||
"output_cost_per_token": 0.000015,
|
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
|
||||||
"output_cost_per_character": 0.00000375,
|
"input_cost_per_token_above_128k_tokens": 0.00000015625,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.00003,
|
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_character_above_128k_tokens": 0.0000075,
|
"output_cost_per_token": 0.0000003125,
|
||||||
"output_cost_per_image": 0.00263,
|
"output_cost_per_character": 0.00000125,
|
||||||
"output_cost_per_video_per_second": 0.00263,
|
"output_cost_per_token_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_character_above_128k_tokens": 0.0000025,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
|
@ -2267,20 +2267,20 @@
|
||||||
"max_audio_length_hours": 8.4,
|
"max_audio_length_hours": 8.4,
|
||||||
"max_audio_per_prompt": 1,
|
"max_audio_per_prompt": 1,
|
||||||
"max_pdf_size_mb": 30,
|
"max_pdf_size_mb": 30,
|
||||||
"input_cost_per_image": 0.0001315,
|
"input_cost_per_image": 0.00002,
|
||||||
"input_cost_per_video_per_second": 0.0001315,
|
"input_cost_per_video_per_second": 0.00002,
|
||||||
"input_cost_per_audio_per_second": 0.000125,
|
"input_cost_per_audio_per_second": 0.000002,
|
||||||
"input_cost_per_token": 0.0000005,
|
"input_cost_per_token": 0.000000004688,
|
||||||
"input_cost_per_character": 0.000000125,
|
"input_cost_per_character": 0.00000001875,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.000001,
|
"input_cost_per_token_above_128k_tokens": 0.000001,
|
||||||
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
||||||
"output_cost_per_token": 0.0000015,
|
"input_cost_per_image_above_128k_tokens": 0.00004,
|
||||||
"output_cost_per_character": 0.000000375,
|
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.000003,
|
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
|
||||||
"output_cost_per_character_above_128k_tokens": 0.00000075,
|
"output_cost_per_token": 0.0000000046875,
|
||||||
"output_cost_per_image": 0.000263,
|
"output_cost_per_character": 0.00000001875,
|
||||||
"output_cost_per_video_per_second": 0.000263,
|
"output_cost_per_token_above_128k_tokens": 0.000000009375,
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_character_above_128k_tokens": 0.0000000375,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
|
@ -2299,20 +2299,20 @@
|
||||||
"max_audio_length_hours": 8.4,
|
"max_audio_length_hours": 8.4,
|
||||||
"max_audio_per_prompt": 1,
|
"max_audio_per_prompt": 1,
|
||||||
"max_pdf_size_mb": 30,
|
"max_pdf_size_mb": 30,
|
||||||
"input_cost_per_image": 0.0001315,
|
"input_cost_per_image": 0.00002,
|
||||||
"input_cost_per_video_per_second": 0.0001315,
|
"input_cost_per_video_per_second": 0.00002,
|
||||||
"input_cost_per_audio_per_second": 0.000125,
|
"input_cost_per_audio_per_second": 0.000002,
|
||||||
"input_cost_per_token": 0.0000005,
|
"input_cost_per_token": 0.000000004688,
|
||||||
"input_cost_per_character": 0.000000125,
|
"input_cost_per_character": 0.00000001875,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.000001,
|
"input_cost_per_token_above_128k_tokens": 0.000001,
|
||||||
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
||||||
"output_cost_per_token": 0.0000015,
|
"input_cost_per_image_above_128k_tokens": 0.00004,
|
||||||
"output_cost_per_character": 0.000000375,
|
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.000003,
|
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
|
||||||
"output_cost_per_character_above_128k_tokens": 0.00000075,
|
"output_cost_per_token": 0.0000000046875,
|
||||||
"output_cost_per_image": 0.000263,
|
"output_cost_per_character": 0.00000001875,
|
||||||
"output_cost_per_video_per_second": 0.000263,
|
"output_cost_per_token_above_128k_tokens": 0.000000009375,
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_character_above_128k_tokens": 0.0000000375,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
|
@ -2331,20 +2331,20 @@
|
||||||
"max_audio_length_hours": 8.4,
|
"max_audio_length_hours": 8.4,
|
||||||
"max_audio_per_prompt": 1,
|
"max_audio_per_prompt": 1,
|
||||||
"max_pdf_size_mb": 30,
|
"max_pdf_size_mb": 30,
|
||||||
"input_cost_per_image": 0.0001315,
|
"input_cost_per_image": 0.00002,
|
||||||
"input_cost_per_video_per_second": 0.0001315,
|
"input_cost_per_video_per_second": 0.00002,
|
||||||
"input_cost_per_audio_per_second": 0.000125,
|
"input_cost_per_audio_per_second": 0.000002,
|
||||||
"input_cost_per_token": 0.0000005,
|
"input_cost_per_token": 0.000000004688,
|
||||||
"input_cost_per_character": 0.000000125,
|
"input_cost_per_character": 0.00000001875,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.000001,
|
"input_cost_per_token_above_128k_tokens": 0.000001,
|
||||||
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
||||||
"output_cost_per_token": 0.0000015,
|
"input_cost_per_image_above_128k_tokens": 0.00004,
|
||||||
"output_cost_per_character": 0.000000375,
|
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.000003,
|
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
|
||||||
"output_cost_per_character_above_128k_tokens": 0.00000075,
|
"output_cost_per_token": 0.0000000046875,
|
||||||
"output_cost_per_image": 0.000263,
|
"output_cost_per_character": 0.00000001875,
|
||||||
"output_cost_per_video_per_second": 0.000263,
|
"output_cost_per_token_above_128k_tokens": 0.000000009375,
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_character_above_128k_tokens": 0.0000000375,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
|
@ -2363,20 +2363,20 @@
|
||||||
"max_audio_length_hours": 8.4,
|
"max_audio_length_hours": 8.4,
|
||||||
"max_audio_per_prompt": 1,
|
"max_audio_per_prompt": 1,
|
||||||
"max_pdf_size_mb": 30,
|
"max_pdf_size_mb": 30,
|
||||||
"input_cost_per_image": 0.0001315,
|
"input_cost_per_image": 0.00002,
|
||||||
"input_cost_per_video_per_second": 0.0001315,
|
"input_cost_per_video_per_second": 0.00002,
|
||||||
"input_cost_per_audio_per_second": 0.000125,
|
"input_cost_per_audio_per_second": 0.000002,
|
||||||
"input_cost_per_token": 0.0000005,
|
"input_cost_per_token": 0.000000004688,
|
||||||
"input_cost_per_character": 0.000000125,
|
"input_cost_per_character": 0.00000001875,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.000001,
|
"input_cost_per_token_above_128k_tokens": 0.000001,
|
||||||
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
||||||
"output_cost_per_token": 0.0000015,
|
"input_cost_per_image_above_128k_tokens": 0.00004,
|
||||||
"output_cost_per_character": 0.000000375,
|
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.000003,
|
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
|
||||||
"output_cost_per_character_above_128k_tokens": 0.00000075,
|
"output_cost_per_token": 0.0000000046875,
|
||||||
"output_cost_per_image": 0.000263,
|
"output_cost_per_character": 0.00000001875,
|
||||||
"output_cost_per_video_per_second": 0.000263,
|
"output_cost_per_token_above_128k_tokens": 0.000000009375,
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_character_above_128k_tokens": 0.0000000375,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
|
@ -2395,20 +2395,20 @@
|
||||||
"max_audio_length_hours": 8.4,
|
"max_audio_length_hours": 8.4,
|
||||||
"max_audio_per_prompt": 1,
|
"max_audio_per_prompt": 1,
|
||||||
"max_pdf_size_mb": 30,
|
"max_pdf_size_mb": 30,
|
||||||
"input_cost_per_image": 0.0001315,
|
"input_cost_per_image": 0.00002,
|
||||||
"input_cost_per_video_per_second": 0.0001315,
|
"input_cost_per_video_per_second": 0.00002,
|
||||||
"input_cost_per_audio_per_second": 0.000125,
|
"input_cost_per_audio_per_second": 0.000002,
|
||||||
"input_cost_per_token": 0.0000005,
|
"input_cost_per_token": 0.000000004688,
|
||||||
"input_cost_per_character": 0.000000125,
|
"input_cost_per_character": 0.00000001875,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.000001,
|
"input_cost_per_token_above_128k_tokens": 0.000001,
|
||||||
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
||||||
"output_cost_per_token": 0.0000015,
|
"input_cost_per_image_above_128k_tokens": 0.00004,
|
||||||
"output_cost_per_character": 0.000000375,
|
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.000003,
|
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
|
||||||
"output_cost_per_character_above_128k_tokens": 0.00000075,
|
"output_cost_per_token": 0.0000000046875,
|
||||||
"output_cost_per_image": 0.000263,
|
"output_cost_per_character": 0.00000001875,
|
||||||
"output_cost_per_video_per_second": 0.000263,
|
"output_cost_per_token_above_128k_tokens": 0.000000009375,
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_character_above_128k_tokens": 0.0000000375,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
|
|
|
@ -6,5 +6,7 @@ model_list:
|
||||||
api_base: os.environ/AZURE_API_BASE
|
api_base: os.environ/AZURE_API_BASE
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
success_callback: ["langfuse"]
|
turn_off_message_logging: true
|
||||||
max_internal_user_budget: 10
|
cache: True
|
||||||
|
cache_params:
|
||||||
|
type: local
|
165
litellm/utils.py
165
litellm/utils.py
|
@ -2771,6 +2771,11 @@ def get_optional_params_embeddings(
|
||||||
|
|
||||||
|
|
||||||
def _remove_additional_properties(schema):
|
def _remove_additional_properties(schema):
|
||||||
|
"""
|
||||||
|
clean out 'additionalProperties = False'. Causes vertexai/gemini OpenAI API Schema errors - https://github.com/langchain-ai/langchainjs/issues/5240
|
||||||
|
|
||||||
|
Relevant Issues: https://github.com/BerriAI/litellm/issues/6136, https://github.com/BerriAI/litellm/issues/6088
|
||||||
|
"""
|
||||||
if isinstance(schema, dict):
|
if isinstance(schema, dict):
|
||||||
# Remove the 'additionalProperties' key if it exists and is set to False
|
# Remove the 'additionalProperties' key if it exists and is set to False
|
||||||
if "additionalProperties" in schema and schema["additionalProperties"] is False:
|
if "additionalProperties" in schema and schema["additionalProperties"] is False:
|
||||||
|
@ -2789,6 +2794,9 @@ def _remove_additional_properties(schema):
|
||||||
|
|
||||||
|
|
||||||
def _remove_strict_from_schema(schema):
|
def _remove_strict_from_schema(schema):
|
||||||
|
"""
|
||||||
|
Relevant Issues: https://github.com/BerriAI/litellm/issues/6136, https://github.com/BerriAI/litellm/issues/6088
|
||||||
|
"""
|
||||||
if isinstance(schema, dict):
|
if isinstance(schema, dict):
|
||||||
# Remove the 'additionalProperties' key if it exists and is set to False
|
# Remove the 'additionalProperties' key if it exists and is set to False
|
||||||
if "strict" in schema:
|
if "strict" in schema:
|
||||||
|
@ -3000,37 +3008,6 @@ def get_optional_params(
|
||||||
non_default_params["response_format"] = type_to_response_format_param(
|
non_default_params["response_format"] = type_to_response_format_param(
|
||||||
response_format=non_default_params["response_format"]
|
response_format=non_default_params["response_format"]
|
||||||
)
|
)
|
||||||
# # clean out 'additionalProperties = False'. Causes vertexai/gemini OpenAI API Schema errors - https://github.com/langchain-ai/langchainjs/issues/5240
|
|
||||||
if (
|
|
||||||
non_default_params["response_format"] is not None
|
|
||||||
and non_default_params["response_format"]
|
|
||||||
.get("json_schema", {})
|
|
||||||
.get("schema")
|
|
||||||
is not None
|
|
||||||
and custom_llm_provider
|
|
||||||
in [
|
|
||||||
"gemini",
|
|
||||||
"vertex_ai",
|
|
||||||
"vertex_ai_beta",
|
|
||||||
]
|
|
||||||
):
|
|
||||||
from litellm.llms.vertex_ai_and_google_ai_studio.common_utils import (
|
|
||||||
_build_vertex_schema,
|
|
||||||
)
|
|
||||||
|
|
||||||
old_schema = copy.deepcopy(
|
|
||||||
non_default_params["response_format"]
|
|
||||||
.get("json_schema", {})
|
|
||||||
.get("schema")
|
|
||||||
)
|
|
||||||
new_schema = _remove_additional_properties(schema=old_schema)
|
|
||||||
if isinstance(new_schema, list):
|
|
||||||
for item in new_schema:
|
|
||||||
if isinstance(item, dict):
|
|
||||||
item = _build_vertex_schema(parameters=item)
|
|
||||||
elif isinstance(new_schema, dict):
|
|
||||||
new_schema = _build_vertex_schema(parameters=new_schema)
|
|
||||||
non_default_params["response_format"]["json_schema"]["schema"] = new_schema
|
|
||||||
if "tools" in non_default_params and isinstance(
|
if "tools" in non_default_params and isinstance(
|
||||||
non_default_params, list
|
non_default_params, list
|
||||||
): # fixes https://github.com/BerriAI/litellm/issues/4933
|
): # fixes https://github.com/BerriAI/litellm/issues/4933
|
||||||
|
@ -3197,7 +3174,7 @@ def get_optional_params(
|
||||||
|
|
||||||
if stream:
|
if stream:
|
||||||
optional_params["stream"] = stream
|
optional_params["stream"] = stream
|
||||||
#return optional_params
|
# return optional_params
|
||||||
if max_tokens is not None:
|
if max_tokens is not None:
|
||||||
if "vicuna" in model or "flan" in model:
|
if "vicuna" in model or "flan" in model:
|
||||||
optional_params["max_length"] = max_tokens
|
optional_params["max_length"] = max_tokens
|
||||||
|
@ -4900,6 +4877,10 @@ def _strip_model_name(model: str) -> str:
|
||||||
return strip_finetune
|
return strip_finetune
|
||||||
|
|
||||||
|
|
||||||
|
def _get_model_info_from_model_cost(key: str) -> dict:
|
||||||
|
return litellm.model_cost[key]
|
||||||
|
|
||||||
|
|
||||||
def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> ModelInfo:
|
def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> ModelInfo:
|
||||||
"""
|
"""
|
||||||
Get a dict for the maximum tokens (context window), input_cost_per_token, output_cost_per_token for a given model.
|
Get a dict for the maximum tokens (context window), input_cost_per_token, output_cost_per_token for a given model.
|
||||||
|
@ -5041,14 +5022,16 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
|
||||||
"""
|
"""
|
||||||
Check if: (in order of specificity)
|
Check if: (in order of specificity)
|
||||||
1. 'custom_llm_provider/model' in litellm.model_cost. Checks "groq/llama3-8b-8192" if model="llama3-8b-8192" and custom_llm_provider="groq"
|
1. 'custom_llm_provider/model' in litellm.model_cost. Checks "groq/llama3-8b-8192" if model="llama3-8b-8192" and custom_llm_provider="groq"
|
||||||
2. 'combined_stripped_model_name' in litellm.model_cost. Checks if 'gemini/gemini-1.5-flash' in model map, if 'gemini/gemini-1.5-flash-001' given.
|
2. 'model' in litellm.model_cost. Checks "gemini-1.5-pro-002" in litellm.model_cost if model="gemini-1.5-pro-002" and custom_llm_provider=None
|
||||||
3. 'stripped_model_name' in litellm.model_cost. Checks if 'ft:gpt-3.5-turbo' in model map, if 'ft:gpt-3.5-turbo:my-org:custom_suffix:id' given.
|
3. 'combined_stripped_model_name' in litellm.model_cost. Checks if 'gemini/gemini-1.5-flash' in model map, if 'gemini/gemini-1.5-flash-001' given.
|
||||||
4. 'model' in litellm.model_cost. Checks "groq/llama3-8b-8192" in litellm.model_cost if model="groq/llama3-8b-8192" and custom_llm_provider=None
|
4. 'stripped_model_name' in litellm.model_cost. Checks if 'ft:gpt-3.5-turbo' in model map, if 'ft:gpt-3.5-turbo:my-org:custom_suffix:id' given.
|
||||||
5. 'split_model' in litellm.model_cost. Checks "llama3-8b-8192" in litellm.model_cost if model="groq/llama3-8b-8192"
|
5. 'split_model' in litellm.model_cost. Checks "llama3-8b-8192" in litellm.model_cost if model="groq/llama3-8b-8192"
|
||||||
"""
|
"""
|
||||||
|
_model_info: Optional[Dict[str, Any]] = None
|
||||||
|
key: Optional[str] = None
|
||||||
if combined_model_name in litellm.model_cost:
|
if combined_model_name in litellm.model_cost:
|
||||||
key = combined_model_name
|
key = combined_model_name
|
||||||
_model_info = litellm.model_cost[combined_model_name]
|
_model_info = _get_model_info_from_model_cost(key=key)
|
||||||
_model_info["supported_openai_params"] = supported_openai_params
|
_model_info["supported_openai_params"] = supported_openai_params
|
||||||
if (
|
if (
|
||||||
"litellm_provider" in _model_info
|
"litellm_provider" in _model_info
|
||||||
|
@ -5059,58 +5042,10 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
|
||||||
].startswith("vertex_ai"):
|
].startswith("vertex_ai"):
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
raise Exception
|
_model_info = None
|
||||||
elif combined_stripped_model_name in litellm.model_cost:
|
if _model_info is None and model in litellm.model_cost:
|
||||||
key = combined_stripped_model_name
|
|
||||||
_model_info = litellm.model_cost[combined_stripped_model_name]
|
|
||||||
_model_info["supported_openai_params"] = supported_openai_params
|
|
||||||
if (
|
|
||||||
"litellm_provider" in _model_info
|
|
||||||
and _model_info["litellm_provider"] != custom_llm_provider
|
|
||||||
):
|
|
||||||
if custom_llm_provider == "vertex_ai" and _model_info[
|
|
||||||
"litellm_provider"
|
|
||||||
].startswith("vertex_ai"):
|
|
||||||
pass
|
|
||||||
elif custom_llm_provider == "fireworks_ai" and _model_info[
|
|
||||||
"litellm_provider"
|
|
||||||
].startswith("fireworks_ai"):
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
raise Exception(
|
|
||||||
"Got provider={}, Expected provider={}, for model={}".format(
|
|
||||||
_model_info["litellm_provider"],
|
|
||||||
custom_llm_provider,
|
|
||||||
model,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
elif stripped_model_name in litellm.model_cost:
|
|
||||||
key = stripped_model_name
|
|
||||||
_model_info = litellm.model_cost[stripped_model_name]
|
|
||||||
_model_info["supported_openai_params"] = supported_openai_params
|
|
||||||
if (
|
|
||||||
"litellm_provider" in _model_info
|
|
||||||
and _model_info["litellm_provider"] != custom_llm_provider
|
|
||||||
):
|
|
||||||
if custom_llm_provider == "vertex_ai" and _model_info[
|
|
||||||
"litellm_provider"
|
|
||||||
].startswith("vertex_ai"):
|
|
||||||
pass
|
|
||||||
elif custom_llm_provider == "fireworks_ai" and _model_info[
|
|
||||||
"litellm_provider"
|
|
||||||
].startswith("fireworks_ai"):
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
raise Exception(
|
|
||||||
"Got provider={}, Expected provider={}, for model={}".format(
|
|
||||||
_model_info["litellm_provider"],
|
|
||||||
custom_llm_provider,
|
|
||||||
model,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
elif model in litellm.model_cost:
|
|
||||||
key = model
|
key = model
|
||||||
_model_info = litellm.model_cost[model]
|
_model_info = _get_model_info_from_model_cost(key=key)
|
||||||
_model_info["supported_openai_params"] = supported_openai_params
|
_model_info["supported_openai_params"] = supported_openai_params
|
||||||
if (
|
if (
|
||||||
"litellm_provider" in _model_info
|
"litellm_provider" in _model_info
|
||||||
|
@ -5125,10 +5060,50 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
|
||||||
].startswith("fireworks_ai"):
|
].startswith("fireworks_ai"):
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
raise Exception
|
_model_info = None
|
||||||
elif split_model in litellm.model_cost:
|
if (
|
||||||
|
_model_info is None
|
||||||
|
and combined_stripped_model_name in litellm.model_cost
|
||||||
|
):
|
||||||
|
key = combined_stripped_model_name
|
||||||
|
_model_info = _get_model_info_from_model_cost(key=key)
|
||||||
|
_model_info["supported_openai_params"] = supported_openai_params
|
||||||
|
if (
|
||||||
|
"litellm_provider" in _model_info
|
||||||
|
and _model_info["litellm_provider"] != custom_llm_provider
|
||||||
|
):
|
||||||
|
if custom_llm_provider == "vertex_ai" and _model_info[
|
||||||
|
"litellm_provider"
|
||||||
|
].startswith("vertex_ai"):
|
||||||
|
pass
|
||||||
|
elif custom_llm_provider == "fireworks_ai" and _model_info[
|
||||||
|
"litellm_provider"
|
||||||
|
].startswith("fireworks_ai"):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
_model_info = None
|
||||||
|
if _model_info is None and stripped_model_name in litellm.model_cost:
|
||||||
|
key = stripped_model_name
|
||||||
|
_model_info = _get_model_info_from_model_cost(key=key)
|
||||||
|
_model_info["supported_openai_params"] = supported_openai_params
|
||||||
|
if (
|
||||||
|
"litellm_provider" in _model_info
|
||||||
|
and _model_info["litellm_provider"] != custom_llm_provider
|
||||||
|
):
|
||||||
|
if custom_llm_provider == "vertex_ai" and _model_info[
|
||||||
|
"litellm_provider"
|
||||||
|
].startswith("vertex_ai"):
|
||||||
|
pass
|
||||||
|
elif custom_llm_provider == "fireworks_ai" and _model_info[
|
||||||
|
"litellm_provider"
|
||||||
|
].startswith("fireworks_ai"):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
_model_info = None
|
||||||
|
|
||||||
|
if _model_info is None and split_model in litellm.model_cost:
|
||||||
key = split_model
|
key = split_model
|
||||||
_model_info = litellm.model_cost[split_model]
|
_model_info = _get_model_info_from_model_cost(key=key)
|
||||||
_model_info["supported_openai_params"] = supported_openai_params
|
_model_info["supported_openai_params"] = supported_openai_params
|
||||||
if (
|
if (
|
||||||
"litellm_provider" in _model_info
|
"litellm_provider" in _model_info
|
||||||
|
@ -5143,8 +5118,8 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
|
||||||
].startswith("fireworks_ai"):
|
].startswith("fireworks_ai"):
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
raise Exception
|
_model_info = None
|
||||||
else:
|
if _model_info is None or key is None:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
|
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
|
||||||
)
|
)
|
||||||
|
@ -5212,7 +5187,7 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
|
||||||
litellm_provider=_model_info.get(
|
litellm_provider=_model_info.get(
|
||||||
"litellm_provider", custom_llm_provider
|
"litellm_provider", custom_llm_provider
|
||||||
),
|
),
|
||||||
mode=_model_info.get("mode"),
|
mode=_model_info.get("mode"), # type: ignore
|
||||||
supported_openai_params=supported_openai_params,
|
supported_openai_params=supported_openai_params,
|
||||||
supports_system_messages=_model_info.get(
|
supports_system_messages=_model_info.get(
|
||||||
"supports_system_messages", None
|
"supports_system_messages", None
|
||||||
|
@ -9260,10 +9235,6 @@ def process_response_headers(response_headers: Union[httpx.Headers, dict]) -> di
|
||||||
processed_headers[k] = v
|
processed_headers[k] = v
|
||||||
else:
|
else:
|
||||||
additional_headers["{}-{}".format("llm_provider", k)] = v
|
additional_headers["{}-{}".format("llm_provider", k)] = v
|
||||||
## GUARANTEE OPENAI HEADERS IN RESPONSE
|
|
||||||
for item in OPENAI_RESPONSE_HEADERS:
|
|
||||||
if item not in openai_headers:
|
|
||||||
openai_headers[item] = None
|
|
||||||
|
|
||||||
additional_headers = {
|
additional_headers = {
|
||||||
**openai_headers,
|
**openai_headers,
|
||||||
|
|
|
@ -2106,20 +2106,20 @@
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 2097152,
|
"max_input_tokens": 2097152,
|
||||||
"max_output_tokens": 8192,
|
"max_output_tokens": 8192,
|
||||||
"input_cost_per_image": 0.001315,
|
"input_cost_per_image": 0.00032875,
|
||||||
"input_cost_per_audio_per_second": 0.000125,
|
"input_cost_per_audio_per_second": 0.00003125,
|
||||||
"input_cost_per_video_per_second": 0.001315,
|
"input_cost_per_video_per_second": 0.00032875,
|
||||||
"input_cost_per_token": 0.000005,
|
"input_cost_per_token": 0.000000078125,
|
||||||
"input_cost_per_character": 0.00000125,
|
"input_cost_per_character": 0.0000003125,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.00001,
|
"input_cost_per_image_above_128k_tokens": 0.0006575,
|
||||||
"input_cost_per_character_above_128k_tokens": 0.0000025,
|
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
|
||||||
"output_cost_per_token": 0.000015,
|
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
|
||||||
"output_cost_per_character": 0.00000375,
|
"input_cost_per_token_above_128k_tokens": 0.00000015625,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.00003,
|
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_character_above_128k_tokens": 0.0000075,
|
"output_cost_per_token": 0.0000003125,
|
||||||
"output_cost_per_image": 0.00263,
|
"output_cost_per_character": 0.00000125,
|
||||||
"output_cost_per_video_per_second": 0.00263,
|
"output_cost_per_token_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_character_above_128k_tokens": 0.0000025,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
|
@ -2132,20 +2132,20 @@
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 2097152,
|
"max_input_tokens": 2097152,
|
||||||
"max_output_tokens": 8192,
|
"max_output_tokens": 8192,
|
||||||
"input_cost_per_image": 0.001315,
|
"input_cost_per_image": 0.00032875,
|
||||||
"input_cost_per_audio_per_second": 0.000125,
|
"input_cost_per_audio_per_second": 0.00003125,
|
||||||
"input_cost_per_video_per_second": 0.001315,
|
"input_cost_per_video_per_second": 0.00032875,
|
||||||
"input_cost_per_token": 0.000005,
|
"input_cost_per_token": 0.000000078125,
|
||||||
"input_cost_per_character": 0.00000125,
|
"input_cost_per_character": 0.0000003125,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.00001,
|
"input_cost_per_image_above_128k_tokens": 0.0006575,
|
||||||
"input_cost_per_character_above_128k_tokens": 0.0000025,
|
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
|
||||||
"output_cost_per_token": 0.000015,
|
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
|
||||||
"output_cost_per_character": 0.00000375,
|
"input_cost_per_token_above_128k_tokens": 0.00000015625,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.00003,
|
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_character_above_128k_tokens": 0.0000075,
|
"output_cost_per_token": 0.0000003125,
|
||||||
"output_cost_per_image": 0.00263,
|
"output_cost_per_character": 0.00000125,
|
||||||
"output_cost_per_video_per_second": 0.00263,
|
"output_cost_per_token_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_character_above_128k_tokens": 0.0000025,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
|
@ -2158,20 +2158,20 @@
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 1000000,
|
"max_input_tokens": 1000000,
|
||||||
"max_output_tokens": 8192,
|
"max_output_tokens": 8192,
|
||||||
"input_cost_per_image": 0.001315,
|
"input_cost_per_image": 0.00032875,
|
||||||
"input_cost_per_audio_per_second": 0.000125,
|
"input_cost_per_audio_per_second": 0.00003125,
|
||||||
"input_cost_per_video_per_second": 0.001315,
|
"input_cost_per_video_per_second": 0.00032875,
|
||||||
"input_cost_per_token": 0.000005,
|
"input_cost_per_token": 0.000000078125,
|
||||||
"input_cost_per_character": 0.00000125,
|
"input_cost_per_character": 0.0000003125,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.00001,
|
"input_cost_per_image_above_128k_tokens": 0.0006575,
|
||||||
"input_cost_per_character_above_128k_tokens": 0.0000025,
|
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
|
||||||
"output_cost_per_token": 0.000015,
|
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
|
||||||
"output_cost_per_character": 0.00000375,
|
"input_cost_per_token_above_128k_tokens": 0.00000015625,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.00003,
|
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_character_above_128k_tokens": 0.0000075,
|
"output_cost_per_token": 0.0000003125,
|
||||||
"output_cost_per_image": 0.00263,
|
"output_cost_per_character": 0.00000125,
|
||||||
"output_cost_per_video_per_second": 0.00263,
|
"output_cost_per_token_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_character_above_128k_tokens": 0.0000025,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
|
@ -2184,20 +2184,20 @@
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 1000000,
|
"max_input_tokens": 1000000,
|
||||||
"max_output_tokens": 8192,
|
"max_output_tokens": 8192,
|
||||||
"input_cost_per_image": 0.001315,
|
"input_cost_per_image": 0.00032875,
|
||||||
"input_cost_per_audio_per_second": 0.000125,
|
"input_cost_per_audio_per_second": 0.00003125,
|
||||||
"input_cost_per_video_per_second": 0.001315,
|
"input_cost_per_video_per_second": 0.00032875,
|
||||||
"input_cost_per_token": 0.000005,
|
"input_cost_per_token": 0.000000078125,
|
||||||
"input_cost_per_character": 0.00000125,
|
"input_cost_per_character": 0.0000003125,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.00001,
|
"input_cost_per_image_above_128k_tokens": 0.0006575,
|
||||||
"input_cost_per_character_above_128k_tokens": 0.0000025,
|
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
|
||||||
"output_cost_per_token": 0.000015,
|
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
|
||||||
"output_cost_per_character": 0.00000375,
|
"input_cost_per_token_above_128k_tokens": 0.00000015625,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.00003,
|
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_character_above_128k_tokens": 0.0000075,
|
"output_cost_per_token": 0.0000003125,
|
||||||
"output_cost_per_image": 0.00263,
|
"output_cost_per_character": 0.00000125,
|
||||||
"output_cost_per_video_per_second": 0.00263,
|
"output_cost_per_token_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_character_above_128k_tokens": 0.0000025,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
|
@ -2210,20 +2210,20 @@
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 1000000,
|
"max_input_tokens": 1000000,
|
||||||
"max_output_tokens": 8192,
|
"max_output_tokens": 8192,
|
||||||
"input_cost_per_image": 0.001315,
|
"input_cost_per_image": 0.00032875,
|
||||||
"input_cost_per_audio_per_second": 0.000125,
|
"input_cost_per_audio_per_second": 0.00003125,
|
||||||
"input_cost_per_video_per_second": 0.001315,
|
"input_cost_per_video_per_second": 0.00032875,
|
||||||
"input_cost_per_token": 0.000005,
|
"input_cost_per_token": 0.000000078125,
|
||||||
"input_cost_per_character": 0.00000125,
|
"input_cost_per_character": 0.0000003125,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.00001,
|
"input_cost_per_image_above_128k_tokens": 0.0006575,
|
||||||
"input_cost_per_character_above_128k_tokens": 0.0000025,
|
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
|
||||||
"output_cost_per_token": 0.000015,
|
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
|
||||||
"output_cost_per_character": 0.00000375,
|
"input_cost_per_token_above_128k_tokens": 0.00000015625,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.00003,
|
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_character_above_128k_tokens": 0.0000075,
|
"output_cost_per_token": 0.0000003125,
|
||||||
"output_cost_per_image": 0.00263,
|
"output_cost_per_character": 0.00000125,
|
||||||
"output_cost_per_video_per_second": 0.00263,
|
"output_cost_per_token_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_character_above_128k_tokens": 0.0000025,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
|
@ -2236,20 +2236,20 @@
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 1000000,
|
"max_input_tokens": 1000000,
|
||||||
"max_output_tokens": 8192,
|
"max_output_tokens": 8192,
|
||||||
"input_cost_per_image": 0.001315,
|
"input_cost_per_image": 0.00032875,
|
||||||
"input_cost_per_audio_per_second": 0.000125,
|
"input_cost_per_audio_per_second": 0.00003125,
|
||||||
"input_cost_per_video_per_second": 0.001315,
|
"input_cost_per_video_per_second": 0.00032875,
|
||||||
"input_cost_per_token": 0.000005,
|
"input_cost_per_token": 0.000000078125,
|
||||||
"input_cost_per_character": 0.00000125,
|
"input_cost_per_character": 0.0000003125,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.00001,
|
"input_cost_per_image_above_128k_tokens": 0.0006575,
|
||||||
"input_cost_per_character_above_128k_tokens": 0.0000025,
|
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
|
||||||
"output_cost_per_token": 0.000015,
|
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
|
||||||
"output_cost_per_character": 0.00000375,
|
"input_cost_per_token_above_128k_tokens": 0.00000015625,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.00003,
|
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_character_above_128k_tokens": 0.0000075,
|
"output_cost_per_token": 0.0000003125,
|
||||||
"output_cost_per_image": 0.00263,
|
"output_cost_per_character": 0.00000125,
|
||||||
"output_cost_per_video_per_second": 0.00263,
|
"output_cost_per_token_above_128k_tokens": 0.000000625,
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_character_above_128k_tokens": 0.0000025,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
|
@ -2267,20 +2267,20 @@
|
||||||
"max_audio_length_hours": 8.4,
|
"max_audio_length_hours": 8.4,
|
||||||
"max_audio_per_prompt": 1,
|
"max_audio_per_prompt": 1,
|
||||||
"max_pdf_size_mb": 30,
|
"max_pdf_size_mb": 30,
|
||||||
"input_cost_per_image": 0.0001315,
|
"input_cost_per_image": 0.00002,
|
||||||
"input_cost_per_video_per_second": 0.0001315,
|
"input_cost_per_video_per_second": 0.00002,
|
||||||
"input_cost_per_audio_per_second": 0.000125,
|
"input_cost_per_audio_per_second": 0.000002,
|
||||||
"input_cost_per_token": 0.0000005,
|
"input_cost_per_token": 0.000000004688,
|
||||||
"input_cost_per_character": 0.000000125,
|
"input_cost_per_character": 0.00000001875,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.000001,
|
"input_cost_per_token_above_128k_tokens": 0.000001,
|
||||||
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
||||||
"output_cost_per_token": 0.0000015,
|
"input_cost_per_image_above_128k_tokens": 0.00004,
|
||||||
"output_cost_per_character": 0.000000375,
|
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.000003,
|
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
|
||||||
"output_cost_per_character_above_128k_tokens": 0.00000075,
|
"output_cost_per_token": 0.0000000046875,
|
||||||
"output_cost_per_image": 0.000263,
|
"output_cost_per_character": 0.00000001875,
|
||||||
"output_cost_per_video_per_second": 0.000263,
|
"output_cost_per_token_above_128k_tokens": 0.000000009375,
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_character_above_128k_tokens": 0.0000000375,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
|
@ -2299,20 +2299,20 @@
|
||||||
"max_audio_length_hours": 8.4,
|
"max_audio_length_hours": 8.4,
|
||||||
"max_audio_per_prompt": 1,
|
"max_audio_per_prompt": 1,
|
||||||
"max_pdf_size_mb": 30,
|
"max_pdf_size_mb": 30,
|
||||||
"input_cost_per_image": 0.0001315,
|
"input_cost_per_image": 0.00002,
|
||||||
"input_cost_per_video_per_second": 0.0001315,
|
"input_cost_per_video_per_second": 0.00002,
|
||||||
"input_cost_per_audio_per_second": 0.000125,
|
"input_cost_per_audio_per_second": 0.000002,
|
||||||
"input_cost_per_token": 0.0000005,
|
"input_cost_per_token": 0.000000004688,
|
||||||
"input_cost_per_character": 0.000000125,
|
"input_cost_per_character": 0.00000001875,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.000001,
|
"input_cost_per_token_above_128k_tokens": 0.000001,
|
||||||
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
||||||
"output_cost_per_token": 0.0000015,
|
"input_cost_per_image_above_128k_tokens": 0.00004,
|
||||||
"output_cost_per_character": 0.000000375,
|
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.000003,
|
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
|
||||||
"output_cost_per_character_above_128k_tokens": 0.00000075,
|
"output_cost_per_token": 0.0000000046875,
|
||||||
"output_cost_per_image": 0.000263,
|
"output_cost_per_character": 0.00000001875,
|
||||||
"output_cost_per_video_per_second": 0.000263,
|
"output_cost_per_token_above_128k_tokens": 0.000000009375,
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_character_above_128k_tokens": 0.0000000375,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
|
@ -2331,20 +2331,20 @@
|
||||||
"max_audio_length_hours": 8.4,
|
"max_audio_length_hours": 8.4,
|
||||||
"max_audio_per_prompt": 1,
|
"max_audio_per_prompt": 1,
|
||||||
"max_pdf_size_mb": 30,
|
"max_pdf_size_mb": 30,
|
||||||
"input_cost_per_image": 0.0001315,
|
"input_cost_per_image": 0.00002,
|
||||||
"input_cost_per_video_per_second": 0.0001315,
|
"input_cost_per_video_per_second": 0.00002,
|
||||||
"input_cost_per_audio_per_second": 0.000125,
|
"input_cost_per_audio_per_second": 0.000002,
|
||||||
"input_cost_per_token": 0.0000005,
|
"input_cost_per_token": 0.000000004688,
|
||||||
"input_cost_per_character": 0.000000125,
|
"input_cost_per_character": 0.00000001875,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.000001,
|
"input_cost_per_token_above_128k_tokens": 0.000001,
|
||||||
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
||||||
"output_cost_per_token": 0.0000015,
|
"input_cost_per_image_above_128k_tokens": 0.00004,
|
||||||
"output_cost_per_character": 0.000000375,
|
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.000003,
|
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
|
||||||
"output_cost_per_character_above_128k_tokens": 0.00000075,
|
"output_cost_per_token": 0.0000000046875,
|
||||||
"output_cost_per_image": 0.000263,
|
"output_cost_per_character": 0.00000001875,
|
||||||
"output_cost_per_video_per_second": 0.000263,
|
"output_cost_per_token_above_128k_tokens": 0.000000009375,
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_character_above_128k_tokens": 0.0000000375,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
|
@ -2363,20 +2363,20 @@
|
||||||
"max_audio_length_hours": 8.4,
|
"max_audio_length_hours": 8.4,
|
||||||
"max_audio_per_prompt": 1,
|
"max_audio_per_prompt": 1,
|
||||||
"max_pdf_size_mb": 30,
|
"max_pdf_size_mb": 30,
|
||||||
"input_cost_per_image": 0.0001315,
|
"input_cost_per_image": 0.00002,
|
||||||
"input_cost_per_video_per_second": 0.0001315,
|
"input_cost_per_video_per_second": 0.00002,
|
||||||
"input_cost_per_audio_per_second": 0.000125,
|
"input_cost_per_audio_per_second": 0.000002,
|
||||||
"input_cost_per_token": 0.0000005,
|
"input_cost_per_token": 0.000000004688,
|
||||||
"input_cost_per_character": 0.000000125,
|
"input_cost_per_character": 0.00000001875,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.000001,
|
"input_cost_per_token_above_128k_tokens": 0.000001,
|
||||||
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
||||||
"output_cost_per_token": 0.0000015,
|
"input_cost_per_image_above_128k_tokens": 0.00004,
|
||||||
"output_cost_per_character": 0.000000375,
|
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.000003,
|
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
|
||||||
"output_cost_per_character_above_128k_tokens": 0.00000075,
|
"output_cost_per_token": 0.0000000046875,
|
||||||
"output_cost_per_image": 0.000263,
|
"output_cost_per_character": 0.00000001875,
|
||||||
"output_cost_per_video_per_second": 0.000263,
|
"output_cost_per_token_above_128k_tokens": 0.000000009375,
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_character_above_128k_tokens": 0.0000000375,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
|
@ -2395,20 +2395,20 @@
|
||||||
"max_audio_length_hours": 8.4,
|
"max_audio_length_hours": 8.4,
|
||||||
"max_audio_per_prompt": 1,
|
"max_audio_per_prompt": 1,
|
||||||
"max_pdf_size_mb": 30,
|
"max_pdf_size_mb": 30,
|
||||||
"input_cost_per_image": 0.0001315,
|
"input_cost_per_image": 0.00002,
|
||||||
"input_cost_per_video_per_second": 0.0001315,
|
"input_cost_per_video_per_second": 0.00002,
|
||||||
"input_cost_per_audio_per_second": 0.000125,
|
"input_cost_per_audio_per_second": 0.000002,
|
||||||
"input_cost_per_token": 0.0000005,
|
"input_cost_per_token": 0.000000004688,
|
||||||
"input_cost_per_character": 0.000000125,
|
"input_cost_per_character": 0.00000001875,
|
||||||
"input_cost_per_token_above_128k_tokens": 0.000001,
|
"input_cost_per_token_above_128k_tokens": 0.000001,
|
||||||
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
||||||
"output_cost_per_token": 0.0000015,
|
"input_cost_per_image_above_128k_tokens": 0.00004,
|
||||||
"output_cost_per_character": 0.000000375,
|
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
|
||||||
"output_cost_per_token_above_128k_tokens": 0.000003,
|
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
|
||||||
"output_cost_per_character_above_128k_tokens": 0.00000075,
|
"output_cost_per_token": 0.0000000046875,
|
||||||
"output_cost_per_image": 0.000263,
|
"output_cost_per_character": 0.00000001875,
|
||||||
"output_cost_per_video_per_second": 0.000263,
|
"output_cost_per_token_above_128k_tokens": 0.000000009375,
|
||||||
"output_cost_per_audio_per_second": 0.00025,
|
"output_cost_per_character_above_128k_tokens": 0.0000000375,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
|
|
|
@ -664,9 +664,39 @@ def test_unmapped_gemini_model_params():
|
||||||
assert optional_params["stop_sequences"] == ["stop_word"]
|
assert optional_params["stop_sequences"] == ["stop_word"]
|
||||||
|
|
||||||
|
|
||||||
def test_drop_nested_params_vllm():
|
def _check_additional_properties(schema):
|
||||||
|
if isinstance(schema, dict):
|
||||||
|
# Remove the 'additionalProperties' key if it exists and is set to False
|
||||||
|
if "additionalProperties" in schema or "strict" in schema:
|
||||||
|
raise ValueError(
|
||||||
|
"additionalProperties and strict should not be in the schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Recursively process all dictionary values
|
||||||
|
for key, value in schema.items():
|
||||||
|
_check_additional_properties(value)
|
||||||
|
|
||||||
|
elif isinstance(schema, list):
|
||||||
|
# Recursively process all items in the list
|
||||||
|
for item in schema:
|
||||||
|
_check_additional_properties(item)
|
||||||
|
|
||||||
|
return schema
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"provider, model",
|
||||||
|
[
|
||||||
|
("hosted_vllm", "my-vllm-model"),
|
||||||
|
("gemini", "gemini-1.5-pro"),
|
||||||
|
("vertex_ai", "gemini-1.5-pro"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_drop_nested_params_add_prop_and_strict(provider, model):
|
||||||
"""
|
"""
|
||||||
Relevant issue - https://github.com/BerriAI/litellm/issues/5288
|
Relevant issue - https://github.com/BerriAI/litellm/issues/5288
|
||||||
|
|
||||||
|
Relevant issue - https://github.com/BerriAI/litellm/issues/6136
|
||||||
"""
|
"""
|
||||||
tools = [
|
tools = [
|
||||||
{
|
{
|
||||||
|
@ -690,8 +720,8 @@ def test_drop_nested_params_vllm():
|
||||||
]
|
]
|
||||||
tool_choice = {"type": "function", "function": {"name": "structure_output"}}
|
tool_choice = {"type": "function", "function": {"name": "structure_output"}}
|
||||||
optional_params = get_optional_params(
|
optional_params = get_optional_params(
|
||||||
model="my-vllm-model",
|
model=model,
|
||||||
custom_llm_provider="hosted_vllm",
|
custom_llm_provider=provider,
|
||||||
temperature=0.2,
|
temperature=0.2,
|
||||||
tools=tools,
|
tools=tools,
|
||||||
tool_choice=tool_choice,
|
tool_choice=tool_choice,
|
||||||
|
@ -700,7 +730,5 @@ def test_drop_nested_params_vllm():
|
||||||
["tools", "function", "additionalProperties"],
|
["tools", "function", "additionalProperties"],
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
print(optional_params["tools"][0]["function"])
|
|
||||||
|
|
||||||
assert "additionalProperties" not in optional_params["tools"][0]["function"]
|
_check_additional_properties(optional_params["tools"])
|
||||||
assert "strict" not in optional_params["tools"][0]["function"]
|
|
||||||
|
|
83
tests/llm_translation/test_vertex.py
Normal file
83
tests/llm_translation/test_vertex.py
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
import io
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_pydantic_obj_2():
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from litellm.llms.custom_httpx.http_handler import HTTPHandler
|
||||||
|
|
||||||
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
class CalendarEvent(BaseModel):
|
||||||
|
name: str
|
||||||
|
date: str
|
||||||
|
participants: list[str]
|
||||||
|
|
||||||
|
class EventsList(BaseModel):
|
||||||
|
events: list[CalendarEvent]
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "List important events from the 20th century."}
|
||||||
|
]
|
||||||
|
expected_request_body = {
|
||||||
|
"contents": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"parts": [{"text": "List important events from the 20th century."}],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"generationConfig": {
|
||||||
|
"response_mime_type": "application/json",
|
||||||
|
"response_schema": {
|
||||||
|
"properties": {
|
||||||
|
"events": {
|
||||||
|
"items": {
|
||||||
|
"properties": {
|
||||||
|
"name": {"type": "string"},
|
||||||
|
"date": {"type": "string"},
|
||||||
|
"participants": {
|
||||||
|
"items": {"type": "string"},
|
||||||
|
"type": "array",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"type": "object",
|
||||||
|
},
|
||||||
|
"type": "array",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type": "object",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
client = HTTPHandler()
|
||||||
|
with patch.object(client, "post", new=MagicMock()) as mock_post:
|
||||||
|
mock_post.return_value = expected_request_body
|
||||||
|
try:
|
||||||
|
litellm.completion(
|
||||||
|
model="gemini/gemini-1.5-pro",
|
||||||
|
messages=messages,
|
||||||
|
response_format=EventsList,
|
||||||
|
client=client,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
mock_post.assert_called_once()
|
||||||
|
|
||||||
|
print(mock_post.call_args.kwargs)
|
||||||
|
|
||||||
|
assert mock_post.call_args.kwargs["json"] == expected_request_body
|
|
@ -2209,3 +2209,28 @@ async def test_redis_proxy_batch_redis_get_cache():
|
||||||
|
|
||||||
print(response._hidden_params)
|
print(response._hidden_params)
|
||||||
assert "cache_key" in response._hidden_params
|
assert "cache_key" in response._hidden_params
|
||||||
|
|
||||||
|
|
||||||
|
def test_logging_turn_off_message_logging_streaming():
|
||||||
|
litellm.turn_off_message_logging = True
|
||||||
|
mock_obj = Cache(type="local")
|
||||||
|
litellm.cache = mock_obj
|
||||||
|
|
||||||
|
with patch.object(mock_obj, "add_cache", new=MagicMock()) as mock_client:
|
||||||
|
print(f"mock_obj.add_cache: {mock_obj.add_cache}")
|
||||||
|
|
||||||
|
resp = litellm.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "hi"}],
|
||||||
|
mock_response="hello",
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
for chunk in resp:
|
||||||
|
continue
|
||||||
|
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
mock_client.assert_called_once()
|
||||||
|
|
||||||
|
assert mock_client.call_args.args[0].choices[0].message.content == "hello"
|
||||||
|
|
|
@ -1711,31 +1711,6 @@ def test_completion_perplexity_api():
|
||||||
# test_completion_perplexity_api()
|
# test_completion_perplexity_api()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(
|
|
||||||
reason="too many requests. Hitting gemini rate limits. Convert to mock test."
|
|
||||||
)
|
|
||||||
def test_completion_pydantic_obj_2():
|
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
litellm.set_verbose = True
|
|
||||||
|
|
||||||
class CalendarEvent(BaseModel):
|
|
||||||
name: str
|
|
||||||
date: str
|
|
||||||
participants: list[str]
|
|
||||||
|
|
||||||
class EventsList(BaseModel):
|
|
||||||
events: list[CalendarEvent]
|
|
||||||
|
|
||||||
messages = [
|
|
||||||
{"role": "user", "content": "List important events from the 20th century."}
|
|
||||||
]
|
|
||||||
|
|
||||||
response = litellm.completion(
|
|
||||||
model="gemini/gemini-1.5-pro", messages=messages, response_format=EventsList
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="this test is flaky")
|
@pytest.mark.skip(reason="this test is flaky")
|
||||||
def test_completion_perplexity_api_2():
|
def test_completion_perplexity_api_2():
|
||||||
try:
|
try:
|
||||||
|
@ -4573,12 +4548,7 @@ async def test_completion_ai21_chat():
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"model",
|
"model",
|
||||||
[
|
["gpt-4o", "azure/chatgpt-v-2", "claude-3-sonnet-20240229"],
|
||||||
"gpt-4o",
|
|
||||||
"azure/chatgpt-v-2",
|
|
||||||
"claude-3-sonnet-20240229",
|
|
||||||
"fireworks_ai/mixtral-8x7b-instruct",
|
|
||||||
],
|
|
||||||
)
|
)
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"stream",
|
"stream",
|
||||||
|
@ -4594,5 +4564,7 @@ def test_completion_response_ratelimit_headers(model, stream):
|
||||||
additional_headers = hidden_params.get("additional_headers", {})
|
additional_headers = hidden_params.get("additional_headers", {})
|
||||||
|
|
||||||
print(additional_headers)
|
print(additional_headers)
|
||||||
|
for k, v in additional_headers.items():
|
||||||
|
assert v != "None" and v is not None
|
||||||
assert "x-ratelimit-remaining-requests" in additional_headers
|
assert "x-ratelimit-remaining-requests" in additional_headers
|
||||||
assert "x-ratelimit-remaining-tokens" in additional_headers
|
assert "x-ratelimit-remaining-tokens" in additional_headers
|
||||||
|
|
|
@ -2359,3 +2359,131 @@ def test_together_ai_embedding_completion_cost():
|
||||||
custom_llm_provider="together_ai",
|
custom_llm_provider="together_ai",
|
||||||
call_type="embedding",
|
call_type="embedding",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_cost_params():
|
||||||
|
"""
|
||||||
|
Relevant Issue: https://github.com/BerriAI/litellm/issues/6133
|
||||||
|
"""
|
||||||
|
litellm.set_verbose = True
|
||||||
|
resp1_prompt_cost, resp1_completion_cost = cost_per_token(
|
||||||
|
model="gemini-1.5-pro-002",
|
||||||
|
prompt_tokens=1000,
|
||||||
|
completion_tokens=1000,
|
||||||
|
custom_llm_provider="vertex_ai_beta",
|
||||||
|
)
|
||||||
|
|
||||||
|
resp2_prompt_cost, resp2_completion_cost = cost_per_token(
|
||||||
|
model="gemini-1.5-pro-002", prompt_tokens=1000, completion_tokens=1000
|
||||||
|
)
|
||||||
|
|
||||||
|
assert resp2_prompt_cost > 0
|
||||||
|
|
||||||
|
assert resp1_prompt_cost == resp2_prompt_cost
|
||||||
|
assert resp1_completion_cost == resp2_completion_cost
|
||||||
|
|
||||||
|
resp3_prompt_cost, resp3_completion_cost = cost_per_token(
|
||||||
|
model="vertex_ai/gemini-1.5-pro-002", prompt_tokens=1000, completion_tokens=1000
|
||||||
|
)
|
||||||
|
|
||||||
|
assert resp3_prompt_cost > 0
|
||||||
|
|
||||||
|
assert resp3_prompt_cost == resp1_prompt_cost
|
||||||
|
assert resp3_completion_cost == resp1_completion_cost
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_cost_params_2():
|
||||||
|
"""
|
||||||
|
Relevant Issue: https://github.com/BerriAI/litellm/issues/6133
|
||||||
|
"""
|
||||||
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
prompt_characters = 1000
|
||||||
|
completion_characters = 1000
|
||||||
|
resp1_prompt_cost, resp1_completion_cost = cost_per_token(
|
||||||
|
model="gemini-1.5-pro-002",
|
||||||
|
prompt_characters=prompt_characters,
|
||||||
|
completion_characters=completion_characters,
|
||||||
|
prompt_tokens=1000,
|
||||||
|
completion_tokens=1000,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(resp1_prompt_cost, resp1_completion_cost)
|
||||||
|
|
||||||
|
model_info = litellm.get_model_info("gemini-1.5-pro-002")
|
||||||
|
input_cost_per_character = model_info["input_cost_per_character"]
|
||||||
|
output_cost_per_character = model_info["output_cost_per_character"]
|
||||||
|
|
||||||
|
assert resp1_prompt_cost == input_cost_per_character * prompt_characters
|
||||||
|
assert resp1_completion_cost == output_cost_per_character * completion_characters
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_cost_params_gemini_3():
|
||||||
|
from litellm.utils import Choices, Message, ModelResponse, Usage
|
||||||
|
|
||||||
|
from litellm.litellm_core_utils.llm_cost_calc.google import cost_per_character
|
||||||
|
|
||||||
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||||
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||||
|
|
||||||
|
response = ModelResponse(
|
||||||
|
id="chatcmpl-61043504-4439-48be-9996-e29bdee24dc3",
|
||||||
|
choices=[
|
||||||
|
Choices(
|
||||||
|
finish_reason="stop",
|
||||||
|
index=0,
|
||||||
|
message=Message(
|
||||||
|
content="Sí. \n",
|
||||||
|
role="assistant",
|
||||||
|
tool_calls=None,
|
||||||
|
function_call=None,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
],
|
||||||
|
created=1728529259,
|
||||||
|
model="gemini-1.5-flash",
|
||||||
|
object="chat.completion",
|
||||||
|
system_fingerprint=None,
|
||||||
|
usage=Usage(
|
||||||
|
completion_tokens=2,
|
||||||
|
prompt_tokens=3771,
|
||||||
|
total_tokens=3773,
|
||||||
|
completion_tokens_details=None,
|
||||||
|
prompt_tokens_details=None,
|
||||||
|
),
|
||||||
|
vertex_ai_grounding_metadata=[],
|
||||||
|
vertex_ai_safety_results=[
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
||||||
|
"probability": "NEGLIGIBLE",
|
||||||
|
},
|
||||||
|
{"category": "HARM_CATEGORY_HATE_SPEECH", "probability": "NEGLIGIBLE"},
|
||||||
|
{"category": "HARM_CATEGORY_HARASSMENT", "probability": "NEGLIGIBLE"},
|
||||||
|
{
|
||||||
|
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
||||||
|
"probability": "NEGLIGIBLE",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
],
|
||||||
|
vertex_ai_citation_metadata=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
pc, cc = cost_per_character(
|
||||||
|
**{
|
||||||
|
"model": "gemini-1.5-flash",
|
||||||
|
"custom_llm_provider": "vertex_ai",
|
||||||
|
"prompt_tokens": 3771,
|
||||||
|
"completion_tokens": 2,
|
||||||
|
"prompt_characters": None,
|
||||||
|
"completion_characters": 3,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
model_info = litellm.get_model_info("gemini-1.5-flash")
|
||||||
|
|
||||||
|
assert round(pc, 10) == round(3771 * model_info["input_cost_per_token"], 10)
|
||||||
|
assert round(cc, 10) == round(
|
||||||
|
3 * model_info["output_cost_per_character"],
|
||||||
|
10,
|
||||||
|
)
|
||||||
|
|
|
@ -1414,6 +1414,7 @@ def test_logging_standard_payload_llm_headers(stream):
|
||||||
with patch.object(
|
with patch.object(
|
||||||
customHandler, "log_success_event", new=MagicMock()
|
customHandler, "log_success_event", new=MagicMock()
|
||||||
) as mock_client:
|
) as mock_client:
|
||||||
|
|
||||||
resp = litellm.completion(
|
resp = litellm.completion(
|
||||||
model="gpt-3.5-turbo",
|
model="gpt-3.5-turbo",
|
||||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||||
|
|
|
@ -68,3 +68,9 @@ def test_get_model_info_finetuned_models():
|
||||||
info = litellm.get_model_info("ft:gpt-3.5-turbo:my-org:custom_suffix:id")
|
info = litellm.get_model_info("ft:gpt-3.5-turbo:my-org:custom_suffix:id")
|
||||||
print("info", info)
|
print("info", info)
|
||||||
assert info["input_cost_per_token"] == 0.000003
|
assert info["input_cost_per_token"] == 0.000003
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_model_info_gemini_pro():
|
||||||
|
info = litellm.get_model_info("gemini-1.5-pro-002")
|
||||||
|
print("info", info)
|
||||||
|
assert info["key"] == "gemini-1.5-pro-002"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue