mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
(litellm SDK perf improvement) - use verbose_logger.debug
and _cached_get_model_info_helper
in _response_cost_calculator
(#7720)
* define _cached_get_model_info_helper * use _cached_get_model_info_helper
This commit is contained in:
parent
15b52039d2
commit
6518bc70a0
2 changed files with 29 additions and 9 deletions
|
@ -60,8 +60,7 @@ from litellm.utils import (
|
||||||
ModelResponse,
|
ModelResponse,
|
||||||
TextCompletionResponse,
|
TextCompletionResponse,
|
||||||
TranscriptionResponse,
|
TranscriptionResponse,
|
||||||
_get_model_info_helper,
|
_cached_get_model_info_helper,
|
||||||
print_verbose,
|
|
||||||
token_counter,
|
token_counter,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -279,7 +278,7 @@ def cost_per_token( # noqa: PLR0915
|
||||||
elif custom_llm_provider == "deepseek":
|
elif custom_llm_provider == "deepseek":
|
||||||
return deepseek_cost_per_token(model=model, usage=usage_block)
|
return deepseek_cost_per_token(model=model, usage=usage_block)
|
||||||
else:
|
else:
|
||||||
model_info = _get_model_info_helper(
|
model_info = _cached_get_model_info_helper(
|
||||||
model=model, custom_llm_provider=custom_llm_provider
|
model=model, custom_llm_provider=custom_llm_provider
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -292,8 +291,11 @@ def cost_per_token( # noqa: PLR0915
|
||||||
model_info.get("input_cost_per_second", None) is not None
|
model_info.get("input_cost_per_second", None) is not None
|
||||||
and response_time_ms is not None
|
and response_time_ms is not None
|
||||||
):
|
):
|
||||||
print_verbose(
|
verbose_logger.debug(
|
||||||
f"For model={model} - input_cost_per_second: {model_info.get('input_cost_per_second')}; response time: {response_time_ms}"
|
"For model=%s - input_cost_per_second: %s; response time: %s",
|
||||||
|
model,
|
||||||
|
model_info.get("input_cost_per_second", None),
|
||||||
|
response_time_ms,
|
||||||
)
|
)
|
||||||
## COST PER SECOND ##
|
## COST PER SECOND ##
|
||||||
prompt_tokens_cost_usd_dollar = (
|
prompt_tokens_cost_usd_dollar = (
|
||||||
|
@ -308,16 +310,22 @@ def cost_per_token( # noqa: PLR0915
|
||||||
model_info.get("output_cost_per_second", None) is not None
|
model_info.get("output_cost_per_second", None) is not None
|
||||||
and response_time_ms is not None
|
and response_time_ms is not None
|
||||||
):
|
):
|
||||||
print_verbose(
|
verbose_logger.debug(
|
||||||
f"For model={model} - output_cost_per_second: {model_info.get('output_cost_per_second')}; response time: {response_time_ms}"
|
"For model=%s - output_cost_per_second: %s; response time: %s",
|
||||||
|
model,
|
||||||
|
model_info.get("output_cost_per_second", None),
|
||||||
|
response_time_ms,
|
||||||
)
|
)
|
||||||
## COST PER SECOND ##
|
## COST PER SECOND ##
|
||||||
completion_tokens_cost_usd_dollar = (
|
completion_tokens_cost_usd_dollar = (
|
||||||
model_info["output_cost_per_second"] * response_time_ms / 1000 # type: ignore
|
model_info["output_cost_per_second"] * response_time_ms / 1000 # type: ignore
|
||||||
)
|
)
|
||||||
|
|
||||||
print_verbose(
|
verbose_logger.debug(
|
||||||
f"Returned custom cost for model={model} - prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}, completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}"
|
"Returned custom cost for model=%s - prompt_tokens_cost_usd_dollar: %s, completion_tokens_cost_usd_dollar: %s",
|
||||||
|
model,
|
||||||
|
prompt_tokens_cost_usd_dollar,
|
||||||
|
completion_tokens_cost_usd_dollar,
|
||||||
)
|
)
|
||||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
|
|
||||||
|
|
|
@ -4013,6 +4013,18 @@ def _get_max_position_embeddings(model_name: str) -> Optional[int]:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=16)
|
||||||
|
def _cached_get_model_info_helper(
|
||||||
|
model: str, custom_llm_provider: Optional[str]
|
||||||
|
) -> ModelInfoBase:
|
||||||
|
"""
|
||||||
|
_get_model_info_helper wrapped with lru_cache
|
||||||
|
|
||||||
|
Speed Optimization to hit high RPS
|
||||||
|
"""
|
||||||
|
return _get_model_info_helper(model=model, custom_llm_provider=custom_llm_provider)
|
||||||
|
|
||||||
|
|
||||||
def _get_model_info_helper( # noqa: PLR0915
|
def _get_model_info_helper( # noqa: PLR0915
|
||||||
model: str, custom_llm_provider: Optional[str] = None
|
model: str, custom_llm_provider: Optional[str] = None
|
||||||
) -> ModelInfoBase:
|
) -> ModelInfoBase:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue