diff --git a/docs/my-website/docs/proxy/enterprise.md b/docs/my-website/docs/proxy/enterprise.md index 3607cb07fa..ceeb915018 100644 --- a/docs/my-website/docs/proxy/enterprise.md +++ b/docs/my-website/docs/proxy/enterprise.md @@ -21,7 +21,7 @@ Features: - ✅ IP address‑based access control lists - ✅ Track Request IP Address - ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints) - - ✅ Set Max Request / File Size on Requests + - ✅ [Set Max Request Size / File Size on Requests](#set-max-request--response-size-on-litellm-proxy) - ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](#enforce-required-params-for-llm-requests) - **Enterprise Spend Tracking Features** - ✅ [Tracking Spend for Custom Tags](#tracking-spend-for-custom-tags) @@ -1288,3 +1288,52 @@ How it works? **Note:** Setting an environment variable within a Python script using os.environ will not make that variable accessible via SSH sessions or any other new processes that are started independently of the Python script. Environment variables set this way only affect the current process and its child processes. + +## Set Max Request / Response Size on LiteLLM Proxy + +Use this if you want to set a maximum request / response size for your proxy server. If a request size is above the size it gets rejected + slack alert triggered + +#### Usage +**Step 1.** Set `max_request_size_mb` and `max_response_size_mb` + +For this example we set a very low limit on `max_request_size_mb` and expect it to get rejected + +:::info +In production we recommend setting a `max_request_size_mb` / `max_response_size_mb` around `32 MB` + +::: + +```yaml +model_list: + - model_name: fake-openai-endpoint + litellm_params: + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ +general_settings: + master_key: sk-1234 + + # Security controls + max_request_size_mb: 0.000000001 # 👈 Key Change - Max Request Size in MB. Set this very low for testing + max_response_size_mb: 100 # 👈 Key Change - Max Response Size in MB +``` + +**Step 2.** Test it with `/chat/completions` request + +```shell +curl http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "fake-openai-endpoint", + "messages": [ + {"role": "user", "content": "Hello, Claude!"} + ] + }' +``` + +**Expected Response from request** +We expect this to fail since the request size is over `max_request_size_mb` +```shell +{"error":{"message":"Request size is too large. Request size is 0.0001125335693359375 MB. Max size is 1e-09 MB","type":"bad_request_error","param":"content-length","code":400}} +``` diff --git a/litellm/proxy/auth/auth_utils.py b/litellm/proxy/auth/auth_utils.py index bd1e50ed0b..83c676518d 100644 --- a/litellm/proxy/auth/auth_utils.py +++ b/litellm/proxy/auth/auth_utils.py @@ -1,5 +1,7 @@ import re +from fastapi import Request + from litellm._logging import verbose_proxy_logger from litellm.proxy._types import * @@ -75,3 +77,69 @@ def is_llm_api_route(route: str) -> bool: return True return False + + +async def check_if_request_size_is_safe(request: Request) -> bool: + """ + Enterprise Only: + - Checks if the request size is within the limit + + Args: + request (Request): The incoming request. + + Returns: + bool: True if the request size is within the limit, False otherwise. + """ + from litellm.proxy.proxy_server import general_settings, premium_user + + max_request_size_mb = general_settings.get("max_request_size_mb", None) + if max_request_size_mb is not None: + # Check if premium user + if premium_user is not True: + verbose_proxy_logger.warning( + f"using max_request_size_mb - not checking - this is an enterprise only feature. {CommonProxyErrors.not_premium_user.value}" + ) + return True + + # Get the request body + content_length = request.headers.get("content-length") + + if content_length: + header_size = int(content_length) + header_size_mb = bytes_to_mb(bytes_value=header_size) + verbose_proxy_logger.debug( + f"content_length request size in MB={header_size_mb}" + ) + + if header_size_mb > max_request_size_mb: + raise ProxyException( + message=f"Request size is too large. Request size is {header_size_mb} MB. Max size is {max_request_size_mb} MB", + type=ProxyErrorTypes.bad_request_error.value, + code=400, + param="content-length", + ) + else: + # If Content-Length is not available, read the body + body = await request.body() + body_size = len(body) + request_size_mb = bytes_to_mb(bytes_value=body_size) + + verbose_proxy_logger.debug( + f"request body request size in MB={request_size_mb}" + ) + if request_size_mb > max_request_size_mb: + raise ProxyException( + message=f"Request size is too large. Request size is {request_size_mb} MB. Max size is {max_request_size_mb} MB", + type=ProxyErrorTypes.bad_request_error.value, + code=400, + param="content-length", + ) + + return True + + +def bytes_to_mb(bytes_value: int): + """ + Helper to convert bytes to MB + """ + return bytes_value / (1024 * 1024) diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py index d91baf5cad..8a1f97f4c3 100644 --- a/litellm/proxy/auth/user_api_key_auth.py +++ b/litellm/proxy/auth/user_api_key_auth.py @@ -57,6 +57,7 @@ from litellm.proxy.auth.auth_checks import ( log_to_opentelemetry, ) from litellm.proxy.auth.auth_utils import ( + check_if_request_size_is_safe, is_llm_api_route, route_in_additonal_public_routes, ) @@ -116,6 +117,21 @@ async def user_api_key_auth( try: route: str = request.url.path + ### LiteLLM Enterprise Security Checks + # Check 1. Check if request size is under max_request_size_mb + # Check 2. FILTER IP ADDRESS + await check_if_request_size_is_safe(request=request) + + is_valid_ip = _check_valid_ip( + allowed_ips=general_settings.get("allowed_ips", None), request=request + ) + + if not is_valid_ip: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Access forbidden: IP address not allowed.", + ) + pass_through_endpoints: Optional[List[dict]] = general_settings.get( "pass_through_endpoints", None ) @@ -170,18 +186,6 @@ async def user_api_key_auth( ``` """ - ### FILTER IP ADDRESS ### - - is_valid_ip = _check_valid_ip( - allowed_ips=general_settings.get("allowed_ips", None), request=request - ) - - if not is_valid_ip: - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail="Access forbidden: IP address not allowed.", - ) - if ( route in LiteLLMRoutes.public_routes.value or route_in_additonal_public_routes(current_route=route) diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index c0045e40cb..82127046da 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -28,5 +28,10 @@ model_list: general_settings: master_key: sk-1234 + # Security controls + max_request_size_mb: 100 + # google cloud run maximum repsonses size is 32MB + max_response_size_mb: 100 + litellm_settings: callbacks: ["otel"] \ No newline at end of file