mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
Merge branch 'main' into litellm_gemini_stream_tool_calling
This commit is contained in:
commit
c643be0c0c
70 changed files with 1844 additions and 984 deletions
|
@ -24,6 +24,8 @@ from litellm.integrations.custom_logger import CustomLogger
|
|||
from litellm.litellm_core_utils.redact_messages import (
|
||||
redact_message_input_output_from_logging,
|
||||
)
|
||||
from litellm.types.llms.openai import HttpxBinaryResponseContent
|
||||
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
|
||||
from litellm.types.utils import (
|
||||
CallTypes,
|
||||
EmbeddingResponse,
|
||||
|
@ -517,33 +519,36 @@ class Logging:
|
|||
self.model_call_details["cache_hit"] = cache_hit
|
||||
## if model in model cost map - log the response cost
|
||||
## else set cost to None
|
||||
verbose_logger.debug(f"Model={self.model};")
|
||||
if (
|
||||
result is not None
|
||||
and (
|
||||
result is not None and self.stream is not True
|
||||
): # handle streaming separately
|
||||
if (
|
||||
isinstance(result, ModelResponse)
|
||||
or isinstance(result, EmbeddingResponse)
|
||||
or isinstance(result, ImageResponse)
|
||||
or isinstance(result, TranscriptionResponse)
|
||||
or isinstance(result, TextCompletionResponse)
|
||||
)
|
||||
and self.stream != True
|
||||
): # handle streaming separately
|
||||
self.model_call_details["response_cost"] = (
|
||||
litellm.response_cost_calculator(
|
||||
response_object=result,
|
||||
model=self.model,
|
||||
cache_hit=self.model_call_details.get("cache_hit", False),
|
||||
custom_llm_provider=self.model_call_details.get(
|
||||
"custom_llm_provider", None
|
||||
),
|
||||
base_model=_get_base_model_from_metadata(
|
||||
model_call_details=self.model_call_details
|
||||
),
|
||||
call_type=self.call_type,
|
||||
optional_params=self.optional_params,
|
||||
or isinstance(result, HttpxBinaryResponseContent) # tts
|
||||
):
|
||||
custom_pricing = use_custom_pricing_for_model(
|
||||
litellm_params=self.litellm_params
|
||||
)
|
||||
self.model_call_details["response_cost"] = (
|
||||
litellm.response_cost_calculator(
|
||||
response_object=result,
|
||||
model=self.model,
|
||||
cache_hit=self.model_call_details.get("cache_hit", False),
|
||||
custom_llm_provider=self.model_call_details.get(
|
||||
"custom_llm_provider", None
|
||||
),
|
||||
base_model=_get_base_model_from_metadata(
|
||||
model_call_details=self.model_call_details
|
||||
),
|
||||
call_type=self.call_type,
|
||||
optional_params=self.optional_params,
|
||||
custom_pricing=custom_pricing,
|
||||
)
|
||||
)
|
||||
)
|
||||
else: # streaming chunks + image gen.
|
||||
self.model_call_details["response_cost"] = None
|
||||
|
||||
|
@ -1798,7 +1803,6 @@ def set_callbacks(callback_list, function_id=None):
|
|||
|
||||
try:
|
||||
for callback in callback_list:
|
||||
print_verbose(f"init callback list: {callback}")
|
||||
if callback == "sentry":
|
||||
try:
|
||||
import sentry_sdk
|
||||
|
@ -2016,3 +2020,17 @@ def get_custom_logger_compatible_class(
|
|||
if isinstance(callback, _PROXY_DynamicRateLimitHandler):
|
||||
return callback # type: ignore
|
||||
return None
|
||||
|
||||
|
||||
def use_custom_pricing_for_model(litellm_params: Optional[dict]) -> bool:
|
||||
if litellm_params is None:
|
||||
return False
|
||||
metadata: Optional[dict] = litellm_params.get("metadata", {})
|
||||
if metadata is None:
|
||||
return False
|
||||
model_info: Optional[dict] = metadata.get("model_info", {})
|
||||
if model_info is not None:
|
||||
for k, v in model_info.items():
|
||||
if k in SPECIAL_MODEL_INFO_PARAMS:
|
||||
return True
|
||||
return False
|
||||
|
|
85
litellm/litellm_core_utils/llm_cost_calc/utils.py
Normal file
85
litellm/litellm_core_utils/llm_cost_calc/utils.py
Normal file
|
@ -0,0 +1,85 @@
|
|||
# What is this?
|
||||
## Helper utilities for cost_per_token()
|
||||
|
||||
import traceback
|
||||
from typing import List, Literal, Optional, Tuple
|
||||
|
||||
import litellm
|
||||
from litellm import verbose_logger
|
||||
|
||||
|
||||
def _generic_cost_per_character(
|
||||
model: str,
|
||||
custom_llm_provider: str,
|
||||
prompt_characters: float,
|
||||
completion_characters: float,
|
||||
custom_prompt_cost: Optional[float],
|
||||
custom_completion_cost: Optional[float],
|
||||
) -> Tuple[Optional[float], Optional[float]]:
|
||||
"""
|
||||
Generic function to help calculate cost per character.
|
||||
"""
|
||||
"""
|
||||
Calculates the cost per character for a given model, input messages, and response object.
|
||||
|
||||
Input:
|
||||
- model: str, the model name without provider prefix
|
||||
- custom_llm_provider: str, "vertex_ai-*"
|
||||
- prompt_characters: float, the number of input characters
|
||||
- completion_characters: float, the number of output characters
|
||||
|
||||
Returns:
|
||||
Tuple[Optional[float], Optional[float]] - prompt_cost_in_usd, completion_cost_in_usd.
|
||||
- returns None if not able to calculate cost.
|
||||
|
||||
Raises:
|
||||
Exception if 'input_cost_per_character' or 'output_cost_per_character' is missing from model_info
|
||||
"""
|
||||
args = locals()
|
||||
## GET MODEL INFO
|
||||
model_info = litellm.get_model_info(
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
|
||||
## CALCULATE INPUT COST
|
||||
try:
|
||||
if custom_prompt_cost is None:
|
||||
assert (
|
||||
"input_cost_per_character" in model_info
|
||||
and model_info["input_cost_per_character"] is not None
|
||||
), "model info for model={} does not have 'input_cost_per_character'-pricing\nmodel_info={}".format(
|
||||
model, model_info
|
||||
)
|
||||
custom_prompt_cost = model_info["input_cost_per_character"]
|
||||
|
||||
prompt_cost = prompt_characters * custom_prompt_cost
|
||||
except Exception as e:
|
||||
verbose_logger.error(
|
||||
"litellm.litellm_core_utils.llm_cost_calc.utils.py::cost_per_character(): Exception occured - {}\n{}\nDefaulting to None".format(
|
||||
str(e), traceback.format_exc()
|
||||
)
|
||||
)
|
||||
|
||||
prompt_cost = None
|
||||
|
||||
## CALCULATE OUTPUT COST
|
||||
try:
|
||||
if custom_completion_cost is None:
|
||||
assert (
|
||||
"output_cost_per_character" in model_info
|
||||
and model_info["output_cost_per_character"] is not None
|
||||
), "model info for model={} does not have 'output_cost_per_character'-pricing\nmodel_info={}".format(
|
||||
model, model_info
|
||||
)
|
||||
custom_completion_cost = model_info["output_cost_per_character"]
|
||||
completion_cost = completion_characters * custom_completion_cost
|
||||
except Exception as e:
|
||||
verbose_logger.error(
|
||||
"litellm.litellm_core_utils.llm_cost_calc.utils.py::cost_per_character(): Exception occured - {}\n{}\nDefaulting to None".format(
|
||||
str(e), traceback.format_exc()
|
||||
)
|
||||
)
|
||||
|
||||
completion_cost = None
|
||||
|
||||
return prompt_cost, completion_cost
|
Loading…
Add table
Add a link
Reference in a new issue