diff --git a/litellm/proxy/auth/auth_utils.py b/litellm/proxy/auth/auth_utils.py index 83c676518..f9be71c35 100644 --- a/litellm/proxy/auth/auth_utils.py +++ b/litellm/proxy/auth/auth_utils.py @@ -1,4 +1,5 @@ import re +import sys from fastapi import Request @@ -88,7 +89,11 @@ async def check_if_request_size_is_safe(request: Request) -> bool: request (Request): The incoming request. Returns: - bool: True if the request size is within the limit, False otherwise. + bool: True if the request size is within the limit + + Raises: + ProxyException: If the request size is too large + """ from litellm.proxy.proxy_server import general_settings, premium_user @@ -138,6 +143,46 @@ async def check_if_request_size_is_safe(request: Request) -> bool: return True +async def check_response_size_is_safe(response: Any) -> bool: + """ + Enterprise Only: + - Checks if the response size is within the limit + + Args: + response (Any): The response to check. + + Returns: + bool: True if the response size is within the limit + + Raises: + ProxyException: If the response size is too large + + """ + + from litellm.proxy.proxy_server import general_settings, premium_user + + max_response_size_mb = general_settings.get("max_response_size_mb", None) + if max_response_size_mb is not None: + # Check if premium user + if premium_user is not True: + verbose_proxy_logger.warning( + f"using max_response_size_mb - not checking - this is an enterprise only feature. {CommonProxyErrors.not_premium_user.value}" + ) + return True + + response_size_mb = bytes_to_mb(bytes_value=sys.getsizeof(response)) + verbose_proxy_logger.debug(f"response size in MB={response_size_mb}") + if response_size_mb > max_response_size_mb: + raise ProxyException( + message=f"Response size is too large. Response size is {response_size_mb} MB. Max size is {max_response_size_mb} MB", + type=ProxyErrorTypes.bad_request_error.value, + code=400, + param="content-length", + ) + + return True + + def bytes_to_mb(bytes_value: int): """ Helper to convert bytes to MB diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 82127046d..7a8bd9535 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -31,7 +31,7 @@ general_settings: # Security controls max_request_size_mb: 100 # google cloud run maximum repsonses size is 32MB - max_response_size_mb: 100 + max_response_size_mb: 10 litellm_settings: callbacks: ["otel"] \ No newline at end of file diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 3a80e1960..3f7edc3bc 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -124,6 +124,7 @@ from litellm.proxy.auth.auth_checks import ( get_user_object, log_to_opentelemetry, ) +from litellm.proxy.auth.auth_utils import check_response_size_is_safe from litellm.proxy.auth.handle_jwt import JWTHandler from litellm.proxy.auth.litellm_license import LicenseCheck from litellm.proxy.auth.model_checks import ( @@ -3000,6 +3001,7 @@ async def chat_completion( **additional_headers, ) ) + await check_response_size_is_safe(response=response) return response except RejectedRequestError as e: @@ -3241,7 +3243,7 @@ async def completion( response_cost=response_cost, ) ) - + await check_response_size_is_safe(response=response) return response except RejectedRequestError as e: _data = e.request_data @@ -3491,6 +3493,7 @@ async def embeddings( call_id=litellm_call_id, ) ) + await check_response_size_is_safe(response=response) return response except Exception as e: