(litellm SDK perf improvement) - use verbose_logger.debug and _cached_get_model_info_helper in _response_cost_calculator (#7720)

* define _cached_get_model_info_helper

* use _cached_get_model_info_helper
This commit is contained in:
Ishaan Jaff 2025-01-12 15:27:54 -08:00 committed by GitHub
parent 15b52039d2
commit 6518bc70a0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 29 additions and 9 deletions

View file

@ -4013,6 +4013,18 @@ def _get_max_position_embeddings(model_name: str) -> Optional[int]:
return None
@lru_cache(maxsize=16)
def _cached_get_model_info_helper(
model: str, custom_llm_provider: Optional[str]
) -> ModelInfoBase:
"""
_get_model_info_helper wrapped with lru_cache
Speed Optimization to hit high RPS
"""
return _get_model_info_helper(model=model, custom_llm_provider=custom_llm_provider)
def _get_model_info_helper( # noqa: PLR0915
model: str, custom_llm_provider: Optional[str] = None
) -> ModelInfoBase: