Merge pull request #4926 from BerriAI/litellm_check_max_request_size

Proxy Enterprise - security - check max request size
This commit is contained in:
Ishaan Jaff 2024-07-27 17:02:12 -07:00 committed by GitHub
commit 003108a074
4 changed files with 139 additions and 13 deletions

View file

@ -21,7 +21,7 @@ Features:
- ✅ IP addressbased access control lists - ✅ IP addressbased access control lists
- ✅ Track Request IP Address - ✅ Track Request IP Address
- ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints) - ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints)
- ✅ Set Max Request / File Size on Requests - ✅ [Set Max Request Size / File Size on Requests](#set-max-request--response-size-on-litellm-proxy)
- ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](#enforce-required-params-for-llm-requests) - ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](#enforce-required-params-for-llm-requests)
- **Enterprise Spend Tracking Features** - **Enterprise Spend Tracking Features**
- ✅ [Tracking Spend for Custom Tags](#tracking-spend-for-custom-tags) - ✅ [Tracking Spend for Custom Tags](#tracking-spend-for-custom-tags)
@ -1288,3 +1288,52 @@ How it works?
**Note:** Setting an environment variable within a Python script using os.environ will not make that variable accessible via SSH sessions or any other new processes that are started independently of the Python script. Environment variables set this way only affect the current process and its child processes. **Note:** Setting an environment variable within a Python script using os.environ will not make that variable accessible via SSH sessions or any other new processes that are started independently of the Python script. Environment variables set this way only affect the current process and its child processes.
## Set Max Request / Response Size on LiteLLM Proxy
Use this if you want to set a maximum request / response size for your proxy server. If a request size is above the size it gets rejected + slack alert triggered
#### Usage
**Step 1.** Set `max_request_size_mb` and `max_response_size_mb`
For this example we set a very low limit on `max_request_size_mb` and expect it to get rejected
:::info
In production we recommend setting a `max_request_size_mb` / `max_response_size_mb` around `32 MB`
:::
```yaml
model_list:
- model_name: fake-openai-endpoint
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
general_settings:
master_key: sk-1234
# Security controls
max_request_size_mb: 0.000000001 # 👈 Key Change - Max Request Size in MB. Set this very low for testing
max_response_size_mb: 100 # 👈 Key Change - Max Response Size in MB
```
**Step 2.** Test it with `/chat/completions` request
```shell
curl http://localhost:4000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \
-d '{
"model": "fake-openai-endpoint",
"messages": [
{"role": "user", "content": "Hello, Claude!"}
]
}'
```
**Expected Response from request**
We expect this to fail since the request size is over `max_request_size_mb`
```shell
{"error":{"message":"Request size is too large. Request size is 0.0001125335693359375 MB. Max size is 1e-09 MB","type":"bad_request_error","param":"content-length","code":400}}
```

View file

@ -1,5 +1,7 @@
import re import re
from fastapi import Request
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm.proxy._types import * from litellm.proxy._types import *
@ -75,3 +77,69 @@ def is_llm_api_route(route: str) -> bool:
return True return True
return False return False
async def check_if_request_size_is_safe(request: Request) -> bool:
"""
Enterprise Only:
- Checks if the request size is within the limit
Args:
request (Request): The incoming request.
Returns:
bool: True if the request size is within the limit, False otherwise.
"""
from litellm.proxy.proxy_server import general_settings, premium_user
max_request_size_mb = general_settings.get("max_request_size_mb", None)
if max_request_size_mb is not None:
# Check if premium user
if premium_user is not True:
verbose_proxy_logger.warning(
f"using max_request_size_mb - not checking - this is an enterprise only feature. {CommonProxyErrors.not_premium_user.value}"
)
return True
# Get the request body
content_length = request.headers.get("content-length")
if content_length:
header_size = int(content_length)
header_size_mb = bytes_to_mb(bytes_value=header_size)
verbose_proxy_logger.debug(
f"content_length request size in MB={header_size_mb}"
)
if header_size_mb > max_request_size_mb:
raise ProxyException(
message=f"Request size is too large. Request size is {header_size_mb} MB. Max size is {max_request_size_mb} MB",
type=ProxyErrorTypes.bad_request_error.value,
code=400,
param="content-length",
)
else:
# If Content-Length is not available, read the body
body = await request.body()
body_size = len(body)
request_size_mb = bytes_to_mb(bytes_value=body_size)
verbose_proxy_logger.debug(
f"request body request size in MB={request_size_mb}"
)
if request_size_mb > max_request_size_mb:
raise ProxyException(
message=f"Request size is too large. Request size is {request_size_mb} MB. Max size is {max_request_size_mb} MB",
type=ProxyErrorTypes.bad_request_error.value,
code=400,
param="content-length",
)
return True
def bytes_to_mb(bytes_value: int):
"""
Helper to convert bytes to MB
"""
return bytes_value / (1024 * 1024)

View file

@ -57,6 +57,7 @@ from litellm.proxy.auth.auth_checks import (
log_to_opentelemetry, log_to_opentelemetry,
) )
from litellm.proxy.auth.auth_utils import ( from litellm.proxy.auth.auth_utils import (
check_if_request_size_is_safe,
is_llm_api_route, is_llm_api_route,
route_in_additonal_public_routes, route_in_additonal_public_routes,
) )
@ -116,6 +117,21 @@ async def user_api_key_auth(
try: try:
route: str = request.url.path route: str = request.url.path
### LiteLLM Enterprise Security Checks
# Check 1. Check if request size is under max_request_size_mb
# Check 2. FILTER IP ADDRESS
await check_if_request_size_is_safe(request=request)
is_valid_ip = _check_valid_ip(
allowed_ips=general_settings.get("allowed_ips", None), request=request
)
if not is_valid_ip:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access forbidden: IP address not allowed.",
)
pass_through_endpoints: Optional[List[dict]] = general_settings.get( pass_through_endpoints: Optional[List[dict]] = general_settings.get(
"pass_through_endpoints", None "pass_through_endpoints", None
) )
@ -170,18 +186,6 @@ async def user_api_key_auth(
``` ```
""" """
### FILTER IP ADDRESS ###
is_valid_ip = _check_valid_ip(
allowed_ips=general_settings.get("allowed_ips", None), request=request
)
if not is_valid_ip:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access forbidden: IP address not allowed.",
)
if ( if (
route in LiteLLMRoutes.public_routes.value route in LiteLLMRoutes.public_routes.value
or route_in_additonal_public_routes(current_route=route) or route_in_additonal_public_routes(current_route=route)

View file

@ -28,5 +28,10 @@ model_list:
general_settings: general_settings:
master_key: sk-1234 master_key: sk-1234
# Security controls
max_request_size_mb: 100
# google cloud run maximum repsonses size is 32MB
max_response_size_mb: 100
litellm_settings: litellm_settings:
callbacks: ["otel"] callbacks: ["otel"]