forked from phoenix/litellm-mirror
Merge pull request #4928 from BerriAI/litellm_check_response_size
[Feat Enterprise] - check max response size
This commit is contained in:
commit
10e70f842d
3 changed files with 51 additions and 3 deletions
|
@ -1,4 +1,5 @@
|
|||
import re
|
||||
import sys
|
||||
|
||||
from fastapi import Request
|
||||
|
||||
|
@ -88,7 +89,11 @@ async def check_if_request_size_is_safe(request: Request) -> bool:
|
|||
request (Request): The incoming request.
|
||||
|
||||
Returns:
|
||||
bool: True if the request size is within the limit, False otherwise.
|
||||
bool: True if the request size is within the limit
|
||||
|
||||
Raises:
|
||||
ProxyException: If the request size is too large
|
||||
|
||||
"""
|
||||
from litellm.proxy.proxy_server import general_settings, premium_user
|
||||
|
||||
|
@ -138,6 +143,46 @@ async def check_if_request_size_is_safe(request: Request) -> bool:
|
|||
return True
|
||||
|
||||
|
||||
async def check_response_size_is_safe(response: Any) -> bool:
|
||||
"""
|
||||
Enterprise Only:
|
||||
- Checks if the response size is within the limit
|
||||
|
||||
Args:
|
||||
response (Any): The response to check.
|
||||
|
||||
Returns:
|
||||
bool: True if the response size is within the limit
|
||||
|
||||
Raises:
|
||||
ProxyException: If the response size is too large
|
||||
|
||||
"""
|
||||
|
||||
from litellm.proxy.proxy_server import general_settings, premium_user
|
||||
|
||||
max_response_size_mb = general_settings.get("max_response_size_mb", None)
|
||||
if max_response_size_mb is not None:
|
||||
# Check if premium user
|
||||
if premium_user is not True:
|
||||
verbose_proxy_logger.warning(
|
||||
f"using max_response_size_mb - not checking - this is an enterprise only feature. {CommonProxyErrors.not_premium_user.value}"
|
||||
)
|
||||
return True
|
||||
|
||||
response_size_mb = bytes_to_mb(bytes_value=sys.getsizeof(response))
|
||||
verbose_proxy_logger.debug(f"response size in MB={response_size_mb}")
|
||||
if response_size_mb > max_response_size_mb:
|
||||
raise ProxyException(
|
||||
message=f"Response size is too large. Response size is {response_size_mb} MB. Max size is {max_response_size_mb} MB",
|
||||
type=ProxyErrorTypes.bad_request_error.value,
|
||||
code=400,
|
||||
param="content-length",
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def bytes_to_mb(bytes_value: int):
|
||||
"""
|
||||
Helper to convert bytes to MB
|
||||
|
|
|
@ -31,7 +31,7 @@ general_settings:
|
|||
# Security controls
|
||||
max_request_size_mb: 100
|
||||
# google cloud run maximum repsonses size is 32MB
|
||||
max_response_size_mb: 100
|
||||
max_response_size_mb: 10
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["otel"]
|
|
@ -124,6 +124,7 @@ from litellm.proxy.auth.auth_checks import (
|
|||
get_user_object,
|
||||
log_to_opentelemetry,
|
||||
)
|
||||
from litellm.proxy.auth.auth_utils import check_response_size_is_safe
|
||||
from litellm.proxy.auth.handle_jwt import JWTHandler
|
||||
from litellm.proxy.auth.litellm_license import LicenseCheck
|
||||
from litellm.proxy.auth.model_checks import (
|
||||
|
@ -3000,6 +3001,7 @@ async def chat_completion(
|
|||
**additional_headers,
|
||||
)
|
||||
)
|
||||
await check_response_size_is_safe(response=response)
|
||||
|
||||
return response
|
||||
except RejectedRequestError as e:
|
||||
|
@ -3241,7 +3243,7 @@ async def completion(
|
|||
response_cost=response_cost,
|
||||
)
|
||||
)
|
||||
|
||||
await check_response_size_is_safe(response=response)
|
||||
return response
|
||||
except RejectedRequestError as e:
|
||||
_data = e.request_data
|
||||
|
@ -3491,6 +3493,7 @@ async def embeddings(
|
|||
call_id=litellm_call_id,
|
||||
)
|
||||
)
|
||||
await check_response_size_is_safe(response=response)
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue