diff --git a/litellm/proxy/auth/auth_utils.py b/litellm/proxy/auth/auth_utils.py
index 83c676518..f9be71c35 100644
--- a/litellm/proxy/auth/auth_utils.py
+++ b/litellm/proxy/auth/auth_utils.py
@@ -1,4 +1,5 @@
 import re
+import sys
 
 from fastapi import Request
 
@@ -88,7 +89,11 @@ async def check_if_request_size_is_safe(request: Request) -> bool:
         request (Request): The incoming request.
 
     Returns:
-        bool: True if the request size is within the limit, False otherwise.
+        bool: True if the request size is within the limit
+
+    Raises:
+        ProxyException: If the request size is too large
+
     """
     from litellm.proxy.proxy_server import general_settings, premium_user
 
@@ -138,6 +143,46 @@ async def check_if_request_size_is_safe(request: Request) -> bool:
     return True
 
 
+async def check_response_size_is_safe(response: Any) -> bool:
+    """
+    Enterprise Only:
+        - Checks if the response size is within the limit
+
+    Args:
+        response (Any): The response to check.
+
+    Returns:
+        bool: True if the response size is within the limit
+
+    Raises:
+        ProxyException: If the response size is too large
+
+    """
+
+    from litellm.proxy.proxy_server import general_settings, premium_user
+
+    max_response_size_mb = general_settings.get("max_response_size_mb", None)
+    if max_response_size_mb is not None:
+        # Check if premium user
+        if premium_user is not True:
+            verbose_proxy_logger.warning(
+                f"using max_response_size_mb - not checking -  this is an enterprise only feature. {CommonProxyErrors.not_premium_user.value}"
+            )
+            return True
+
+        response_size_mb = bytes_to_mb(bytes_value=sys.getsizeof(response))
+        verbose_proxy_logger.debug(f"response size in MB={response_size_mb}")
+        if response_size_mb > max_response_size_mb:
+            raise ProxyException(
+                message=f"Response size is too large. Response size is {response_size_mb} MB. Max size is {max_response_size_mb} MB",
+                type=ProxyErrorTypes.bad_request_error.value,
+                code=400,
+                param="content-length",
+            )
+
+    return True
+
+
 def bytes_to_mb(bytes_value: int):
     """
     Helper to convert bytes to MB
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index 82127046d..7a8bd9535 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -31,7 +31,7 @@ general_settings:
   # Security controls
   max_request_size_mb: 100
   # google cloud run maximum repsonses size is 32MB
-  max_response_size_mb: 100
+  max_response_size_mb: 10
 
 litellm_settings:
   callbacks: ["otel"]
\ No newline at end of file
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 3a80e1960..3f7edc3bc 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -124,6 +124,7 @@ from litellm.proxy.auth.auth_checks import (
     get_user_object,
     log_to_opentelemetry,
 )
+from litellm.proxy.auth.auth_utils import check_response_size_is_safe
 from litellm.proxy.auth.handle_jwt import JWTHandler
 from litellm.proxy.auth.litellm_license import LicenseCheck
 from litellm.proxy.auth.model_checks import (
@@ -3000,6 +3001,7 @@ async def chat_completion(
                 **additional_headers,
             )
         )
+        await check_response_size_is_safe(response=response)
 
         return response
     except RejectedRequestError as e:
@@ -3241,7 +3243,7 @@ async def completion(
                 response_cost=response_cost,
             )
         )
-
+        await check_response_size_is_safe(response=response)
         return response
     except RejectedRequestError as e:
         _data = e.request_data
@@ -3491,6 +3493,7 @@ async def embeddings(
                 call_id=litellm_call_id,
             )
         )
+        await check_response_size_is_safe(response=response)
 
         return response
     except Exception as e: