[Fix proxy perf] Use correct cache key when reading from redis cache (#5928)

* fix parallel request limiter use correct user id

* async def get_user_object(
fix

* use safe get_internal_user_object

* fix store internal users in redis correctly
This commit is contained in:
Ishaan Jaff 2024-09-26 18:13:35 -07:00 committed by GitHub
parent 8b6eec1951
commit 58171f35ef
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 51 additions and 7 deletions

View file

@ -322,9 +322,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
# check if REQUEST ALLOWED for user_id
user_id = user_api_key_dict.user_id
if user_id is not None:
_user_id_rate_limits = await self.internal_usage_cache.async_get_cache(
key=user_id,
litellm_parent_otel_span=user_api_key_dict.parent_otel_span,
_user_id_rate_limits = await self.get_internal_user_object(
user_id=user_id,
user_api_key_dict=user_api_key_dict,
)
# get user tpm/rpm limits
if _user_id_rate_limits is not None and isinstance(
@ -741,3 +741,39 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
str(e)
)
)
async def get_internal_user_object(
self,
user_id: str,
user_api_key_dict: UserAPIKeyAuth,
) -> Optional[dict]:
"""
Helper to get the 'Internal User Object'
It uses the `get_user_object` function from `litellm.proxy.auth.auth_checks`
We need this because the UserApiKeyAuth object does not contain the rpm/tpm limits for a User AND there could be a perf impact by additionally reading the UserTable.
"""
from litellm._logging import verbose_proxy_logger
from litellm.proxy.auth.auth_checks import get_user_object
from litellm.proxy.proxy_server import prisma_client
try:
_user_id_rate_limits = await get_user_object(
user_id=user_id,
prisma_client=prisma_client,
user_api_key_cache=self.internal_usage_cache.dual_cache,
user_id_upsert=False,
parent_otel_span=user_api_key_dict.parent_otel_span,
proxy_logging_obj=None,
)
if _user_id_rate_limits is None:
return None
return _user_id_rate_limits.model_dump()
except Exception as e:
verbose_proxy_logger.exception(
"Parallel Request Limiter: Error getting user object", str(e)
)
return None