mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
(litellm sdk speedup router) - adds a helper _cached_get_model_group_info
to use when trying to get deployment tpm/rpm limits (#7719)
* fix _cached_get_model_group_info * fixes get_remaining_model_group_usage * test_cached_get_model_group_info
This commit is contained in:
parent
bb4b9b8fbf
commit
b4a99afee3
2 changed files with 36 additions and 4 deletions
|
@ -19,6 +19,7 @@ import time
|
|||
import traceback
|
||||
import uuid
|
||||
from collections import defaultdict
|
||||
from functools import lru_cache
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
|
@ -4696,11 +4697,19 @@ class Router:
|
|||
rpm_usage += t
|
||||
return tpm_usage, rpm_usage
|
||||
|
||||
@lru_cache(maxsize=64)
|
||||
def _cached_get_model_group_info(
|
||||
self, model_group: str
|
||||
) -> Optional[ModelGroupInfo]:
|
||||
"""
|
||||
Cached version of get_model_group_info, uses @lru_cache wrapper
|
||||
|
||||
This is a speed optimization, since set_response_headers makes a call to get_model_group_info on every request
|
||||
"""
|
||||
return self.get_model_group_info(model_group)
|
||||
|
||||
async def get_remaining_model_group_usage(self, model_group: str) -> Dict[str, int]:
|
||||
|
||||
current_tpm, current_rpm = await self.get_model_group_usage(model_group)
|
||||
|
||||
model_group_info = self.get_model_group_info(model_group)
|
||||
model_group_info = self._cached_get_model_group_info(model_group)
|
||||
|
||||
if model_group_info is not None and model_group_info.tpm is not None:
|
||||
tpm_limit = model_group_info.tpm
|
||||
|
@ -4712,6 +4721,11 @@ class Router:
|
|||
else:
|
||||
rpm_limit = None
|
||||
|
||||
if tpm_limit is None and rpm_limit is None:
|
||||
return {}
|
||||
|
||||
current_tpm, current_rpm = await self.get_model_group_usage(model_group)
|
||||
|
||||
returned_dict = {}
|
||||
if tpm_limit is not None:
|
||||
returned_dict["x-ratelimit-remaining-tokens"] = tpm_limit - (
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue