Merge pull request #4926 from BerriAI/litellm_check_max_request_size

Proxy Enterprise - security - check max request size
2025-04-27 19:54:13 +00:00 · 2024-07-27 17:02:12 -07:00 · 2024-07-27 17:02:12 -07:00 · 003108a074
commit 003108a074
parent 4336ff1294 e12d6b0a2a
4 changed files with 139 additions and 13 deletions
--- a/docs/my-website/docs/proxy/enterprise.md
+++ b/docs/my-website/docs/proxy/enterprise.md
@ -21,7 +21,7 @@ Features:
    - ✅ IP address‑based access control lists
    - ✅ Track Request IP Address
    - ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints)
-    - ✅ Set Max Request / File Size on Requests
+    - ✅ [Set Max Request Size / File Size on Requests](#set-max-request--response-size-on-litellm-proxy)
    - ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](#enforce-required-params-for-llm-requests)
 - **Enterprise Spend Tracking Features**
    - ✅ [Tracking Spend for Custom Tags](#tracking-spend-for-custom-tags)
@ -1288,3 +1288,52 @@ How it works?

 **Note:** Setting an environment variable within a Python script using os.environ will not make that variable accessible via SSH sessions or any other new processes that are started independently of the Python script. Environment variables set this way only affect the current process and its child processes.

+
+## Set Max Request / Response Size on LiteLLM Proxy
+
+Use this if you want to set a maximum request / response size for your proxy server. If a request size is above the size it gets rejected + slack alert triggered
+
+#### Usage 
+**Step 1.** Set `max_request_size_mb` and `max_response_size_mb`
+
+For this example we set a very low limit on `max_request_size_mb` and expect it to get rejected 
+
+:::info
+In production we recommend setting a `max_request_size_mb` /  `max_response_size_mb` around `32 MB`
+
+:::
+
+```yaml
+model_list:
+  - model_name: fake-openai-endpoint
+    litellm_params:
+      model: openai/fake
+      api_key: fake-key
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+general_settings: 
+  master_key: sk-1234
+
+  # Security controls
+  max_request_size_mb: 0.000000001 # 👈 Key Change - Max Request Size in MB. Set this very low for testing 
+  max_response_size_mb: 100 # 👈 Key Change - Max Response Size in MB
+```
+
+**Step 2.** Test it with `/chat/completions` request
+
+```shell
+curl http://localhost:4000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer sk-1234" \
+  -d '{
+    "model": "fake-openai-endpoint",
+    "messages": [
+      {"role": "user", "content": "Hello, Claude!"}
+    ]
+  }'
+```
+
+**Expected Response from request**
+We expect this to fail since the request size is over `max_request_size_mb`
+```shell
+{"error":{"message":"Request size is too large. Request size is 0.0001125335693359375 MB. Max size is 1e-09 MB","type":"bad_request_error","param":"content-length","code":400}}
+```
--- a/litellm/proxy/auth/auth_utils.py
+++ b/litellm/proxy/auth/auth_utils.py
@ -1,5 +1,7 @@
 import re

+from fastapi import Request
+
 from litellm._logging import verbose_proxy_logger
 from litellm.proxy._types import *

@ -75,3 +77,69 @@ def is_llm_api_route(route: str) -> bool:
                return True

    return False
+
+
+async def check_if_request_size_is_safe(request: Request) -> bool:
+    """
+    Enterprise Only:
+        - Checks if the request size is within the limit
+
+    Args:
+        request (Request): The incoming request.
+
+    Returns:
+        bool: True if the request size is within the limit, False otherwise.
+    """
+    from litellm.proxy.proxy_server import general_settings, premium_user
+
+    max_request_size_mb = general_settings.get("max_request_size_mb", None)
+    if max_request_size_mb is not None:
+        # Check if premium user
+        if premium_user is not True:
+            verbose_proxy_logger.warning(
+                f"using max_request_size_mb - not checking -  this is an enterprise only feature. {CommonProxyErrors.not_premium_user.value}"
+            )
+            return True
+
+        # Get the request body
+        content_length = request.headers.get("content-length")
+
+        if content_length:
+            header_size = int(content_length)
+            header_size_mb = bytes_to_mb(bytes_value=header_size)
+            verbose_proxy_logger.debug(
+                f"content_length request size in MB={header_size_mb}"
+            )
+
+            if header_size_mb > max_request_size_mb:
+                raise ProxyException(
+                    message=f"Request size is too large. Request size is {header_size_mb} MB. Max size is {max_request_size_mb} MB",
+                    type=ProxyErrorTypes.bad_request_error.value,
+                    code=400,
+                    param="content-length",
+                )
+        else:
+            # If Content-Length is not available, read the body
+            body = await request.body()
+            body_size = len(body)
+            request_size_mb = bytes_to_mb(bytes_value=body_size)
+
+            verbose_proxy_logger.debug(
+                f"request body request size in MB={request_size_mb}"
+            )
+            if request_size_mb > max_request_size_mb:
+                raise ProxyException(
+                    message=f"Request size is too large. Request size is {request_size_mb} MB. Max size is {max_request_size_mb} MB",
+                    type=ProxyErrorTypes.bad_request_error.value,
+                    code=400,
+                    param="content-length",
+                )
+
+    return True
+
+
+def bytes_to_mb(bytes_value: int):
+    """
+    Helper to convert bytes to MB
+    """
+    return bytes_value / (1024 * 1024)
--- a/litellm/proxy/auth/user_api_key_auth.py
+++ b/litellm/proxy/auth/user_api_key_auth.py
@ -57,6 +57,7 @@ from litellm.proxy.auth.auth_checks import (
    log_to_opentelemetry,
 )
 from litellm.proxy.auth.auth_utils import (
+    check_if_request_size_is_safe,
    is_llm_api_route,
    route_in_additonal_public_routes,
 )
@ -116,6 +117,21 @@ async def user_api_key_auth(
    try:
        route: str = request.url.path

+        ### LiteLLM Enterprise Security Checks
+        # Check 1. Check if request size is under max_request_size_mb
+        # Check 2. FILTER IP ADDRESS
+        await check_if_request_size_is_safe(request=request)
+
+        is_valid_ip = _check_valid_ip(
+            allowed_ips=general_settings.get("allowed_ips", None), request=request
+        )
+
+        if not is_valid_ip:
+            raise HTTPException(
+                status_code=status.HTTP_403_FORBIDDEN,
+                detail="Access forbidden: IP address not allowed.",
+            )
+
        pass_through_endpoints: Optional[List[dict]] = general_settings.get(
            "pass_through_endpoints", None
        )
@ -170,18 +186,6 @@ async def user_api_key_auth(
        ```
        """

-        ### FILTER IP ADDRESS ###
-
-        is_valid_ip = _check_valid_ip(
-            allowed_ips=general_settings.get("allowed_ips", None), request=request
-        )
-
-        if not is_valid_ip:
-            raise HTTPException(
-                status_code=status.HTTP_403_FORBIDDEN,
-                detail="Access forbidden: IP address not allowed.",
-            )
-
        if (
            route in LiteLLMRoutes.public_routes.value
            or route_in_additonal_public_routes(current_route=route)
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -28,5 +28,10 @@ model_list:
 general_settings: 
  master_key: sk-1234

+  # Security controls
+  max_request_size_mb: 100
+  # google cloud run maximum repsonses size is 32MB
+  max_response_size_mb: 100
+
 litellm_settings:
  callbacks: ["otel"]