Merge pull request #4926 from BerriAI/litellm_check_max_request_size

Proxy Enterprise - security - check max request size
2025-04-27 19:54:13 +00:00 · 2024-07-27 17:02:12 -07:00 · 2024-07-27 17:02:12 -07:00 · 003108a074
commit 003108a074
parent 4336ff1294 e12d6b0a2a
4 changed files with 139 additions and 13 deletions
--- a/docs/my-website/docs/proxy/enterprise.md
+++ b/docs/my-website/docs/proxy/enterprise.md
@ -21,7 +21,7 @@ Features:
    - ✅ IP address‑based access control lists
    - ✅ Track Request IP Address
    - ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints)
-    - ✅ Set Max Request / File Size on Requests
+    - ✅ [Set Max Request Size / File Size on Requests](#set-max-request--response-size-on-litellm-proxy)
    - ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](#enforce-required-params-for-llm-requests)
 - **Enterprise Spend Tracking Features**
    - ✅ [Tracking Spend for Custom Tags](#tracking-spend-for-custom-tags)
@ -1288,3 +1288,52 @@ How it works?
 **Note:** Setting an environment variable within a Python script using os.environ will not make that variable accessible via SSH sessions or any other new processes that are started independently of the Python script. Environment variables set this way only affect the current process and its child processes.
 ## Set Max Request / Response Size on LiteLLM Proxy
 Use this if you want to set a maximum request / response size for your proxy server. If a request size is above the size it gets rejected + slack alert triggered
 #### Usage 
 **Step 1.** Set `max_request_size_mb` and `max_response_size_mb`
 For this example we set a very low limit on `max_request_size_mb` and expect it to get rejected 
 :::info
 In production we recommend setting a `max_request_size_mb` /  `max_response_size_mb` around `32 MB`
 :::
 ```yaml
 model_list:
  - model_name: fake-openai-endpoint
    litellm_params:
      model: openai/fake
      api_key: fake-key
      api_base: https://exampleopenaiendpoint-production.up.railway.app/
 general_settings: 
  master_key: sk-1234
  # Security controls
  max_request_size_mb: 0.000000001 # 👈 Key Change - Max Request Size in MB. Set this very low for testing 
  max_response_size_mb: 100 # 👈 Key Change - Max Response Size in MB
 ```
 **Step 2.** Test it with `/chat/completions` request
 ```shell
 curl http://localhost:4000/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer sk-1234" \
  -d '{
    "model": "fake-openai-endpoint",
    "messages": [
      {"role": "user", "content": "Hello, Claude!"}
    ]
  }'
 ```
 **Expected Response from request**
 We expect this to fail since the request size is over `max_request_size_mb`
 ```shell
 {"error":{"message":"Request size is too large. Request size is 0.0001125335693359375 MB. Max size is 1e-09 MB","type":"bad_request_error","param":"content-length","code":400}}
 ```
--- a/litellm/proxy/auth/auth_utils.py
+++ b/litellm/proxy/auth/auth_utils.py
@ -1,5 +1,7 @@
 import re
 from fastapi import Request
 from litellm._logging import verbose_proxy_logger
 from litellm.proxy._types import *
@ -75,3 +77,69 @@ def is_llm_api_route(route: str) -> bool:
                return True
    return False
 async def check_if_request_size_is_safe(request: Request) -> bool:
    """
    Enterprise Only:
        - Checks if the request size is within the limit
    Args:
        request (Request): The incoming request.
    Returns:
        bool: True if the request size is within the limit, False otherwise.
    """
    from litellm.proxy.proxy_server import general_settings, premium_user
    max_request_size_mb = general_settings.get("max_request_size_mb", None)
    if max_request_size_mb is not None:
        # Check if premium user
        if premium_user is not True:
            verbose_proxy_logger.warning(
                f"using max_request_size_mb - not checking -  this is an enterprise only feature. {CommonProxyErrors.not_premium_user.value}"
            )
            return True
        # Get the request body
        content_length = request.headers.get("content-length")
        if content_length:
            header_size = int(content_length)
            header_size_mb = bytes_to_mb(bytes_value=header_size)
            verbose_proxy_logger.debug(
                f"content_length request size in MB={header_size_mb}"
            )
            if header_size_mb > max_request_size_mb:
                raise ProxyException(
                    message=f"Request size is too large. Request size is {header_size_mb} MB. Max size is {max_request_size_mb} MB",
                    type=ProxyErrorTypes.bad_request_error.value,
                    code=400,
                    param="content-length",
                )
        else:
            # If Content-Length is not available, read the body
            body = await request.body()
            body_size = len(body)
            request_size_mb = bytes_to_mb(bytes_value=body_size)
            verbose_proxy_logger.debug(
                f"request body request size in MB={request_size_mb}"
            )
            if request_size_mb > max_request_size_mb:
                raise ProxyException(
                    message=f"Request size is too large. Request size is {request_size_mb} MB. Max size is {max_request_size_mb} MB",
                    type=ProxyErrorTypes.bad_request_error.value,
                    code=400,
                    param="content-length",
                )
    return True
 def bytes_to_mb(bytes_value: int):
    """
    Helper to convert bytes to MB
    """
    return bytes_value / (1024 * 1024)
--- a/litellm/proxy/auth/user_api_key_auth.py
+++ b/litellm/proxy/auth/user_api_key_auth.py
@ -57,6 +57,7 @@ from litellm.proxy.auth.auth_checks import (
    log_to_opentelemetry,
 )
 from litellm.proxy.auth.auth_utils import (
    check_if_request_size_is_safe,
    is_llm_api_route,
    route_in_additonal_public_routes,
 )
@ -116,6 +117,21 @@ async def user_api_key_auth(
    try:
        route: str = request.url.path
        ### LiteLLM Enterprise Security Checks
        # Check 1. Check if request size is under max_request_size_mb
        # Check 2. FILTER IP ADDRESS
        await check_if_request_size_is_safe(request=request)
        is_valid_ip = _check_valid_ip(
            allowed_ips=general_settings.get("allowed_ips", None), request=request
        )
        if not is_valid_ip:
            raise HTTPException(
                status_code=status.HTTP_403_FORBIDDEN,
                detail="Access forbidden: IP address not allowed.",
            )
        pass_through_endpoints: Optional[List[dict]] = general_settings.get(
            "pass_through_endpoints", None
        )
@ -170,18 +186,6 @@ async def user_api_key_auth(
        ```
        """
        ### FILTER IP ADDRESS ###
        is_valid_ip = _check_valid_ip(
            allowed_ips=general_settings.get("allowed_ips", None), request=request
        )
        if not is_valid_ip:
            raise HTTPException(
                status_code=status.HTTP_403_FORBIDDEN,
                detail="Access forbidden: IP address not allowed.",
            )
        if (
            route in LiteLLMRoutes.public_routes.value
            or route_in_additonal_public_routes(current_route=route)
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -28,5 +28,10 @@ model_list:
 general_settings: 
  master_key: sk-1234
  # Security controls
  max_request_size_mb: 100
  # google cloud run maximum repsonses size is 32MB
  max_response_size_mb: 100
 litellm_settings:
  callbacks: ["otel"]