diff --git a/docs/my-website/docs/enterprise.md b/docs/my-website/docs/enterprise.md index f33e2dda9..9f075ef35 100644 --- a/docs/my-website/docs/enterprise.md +++ b/docs/my-website/docs/enterprise.md @@ -27,6 +27,7 @@ This covers: - ✅ IP address‑based access control lists - ✅ Track Request IP Address - ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](./proxy/pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints) + - ✅ Set Max Request / File Size on Requests - ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](./proxy/enterprise#enforce-required-params-for-llm-requests) - **Spend Tracking** - ✅ [Tracking Spend for Custom Tags](./proxy/enterprise#tracking-spend-for-custom-tags) diff --git a/docs/my-website/docs/proxy/enterprise.md b/docs/my-website/docs/proxy/enterprise.md index ef06c665c..507c7f693 100644 --- a/docs/my-website/docs/proxy/enterprise.md +++ b/docs/my-website/docs/proxy/enterprise.md @@ -21,6 +21,7 @@ Features: - ✅ IP address‑based access control lists - ✅ Track Request IP Address - ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints) + - ✅ Set Max Request / File Size on Requests - ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](#enforce-required-params-for-llm-requests) - **Spend Tracking** - ✅ [Tracking Spend for Custom Tags](#tracking-spend-for-custom-tags) diff --git a/docs/my-website/docs/text_to_speech.md b/docs/my-website/docs/text_to_speech.md index 73a12c434..5e5f8b990 100644 --- a/docs/my-website/docs/text_to_speech.md +++ b/docs/my-website/docs/text_to_speech.md @@ -109,4 +109,33 @@ response = speech( input="the quick brown fox jumped over the lazy dogs", ) response.stream_to_file(speech_file_path) +``` + +## ✨ Enterprise LiteLLM Proxy - Set Max Request File Size + +Use this when you want to limit the file size for requests sent to `audio/transcriptions` + +```yaml +- model_name: whisper + litellm_params: + model: whisper-1 + api_key: sk-******* + max_file_size_mb: 0.00001 # 👈 max file size in MB (Set this intentionally very small for testing) + model_info: + mode: audio_transcription +``` + +Make a test Request with a valid file +```shell +curl --location 'http://localhost:4000/v1/audio/transcriptions' \ +--header 'Authorization: Bearer sk-1234' \ +--form 'file=@"/Users/ishaanjaffer/Github/litellm/tests/gettysburg.wav"' \ +--form 'model="whisper"' +``` + + +Expect to see the follow response + +```shell +{"error":{"message":"File size is too large. Please check your file size. Passed file size: 0.7392807006835938 MB. Max file size: 0.0001 MB","type":"bad_request","param":"file","code":500}}% ``` \ No newline at end of file diff --git a/litellm/proxy/common_utils/http_parsing_utils.py b/litellm/proxy/common_utils/http_parsing_utils.py index cc2471391..72c901c15 100644 --- a/litellm/proxy/common_utils/http_parsing_utils.py +++ b/litellm/proxy/common_utils/http_parsing_utils.py @@ -1,6 +1,11 @@ -from typing import Optional -from fastapi import Request -import ast, json +import ast +import json +from typing import List, Optional + +from fastapi import Request, UploadFile, status + +from litellm._logging import verbose_proxy_logger +from litellm.types.router import Deployment async def _read_request_body(request: Optional[Request]) -> dict: @@ -29,3 +34,66 @@ async def _read_request_body(request: Optional[Request]) -> dict: return request_data except: return {} + + +def check_file_size_under_limit( + request_data: dict, + file: UploadFile, + router_model_names: List[str], +) -> bool: + """ + Check if any files passed in request are under max_file_size_mb + + Returns True -> when file size is under max_file_size_mb limit + Raises ProxyException -> when file size is over max_file_size_mb limit or not a premium_user + """ + from litellm.proxy.proxy_server import ( + CommonProxyErrors, + ProxyException, + llm_router, + premium_user, + ) + + file_contents_size = file.size or 0 + file_content_size_in_mb = file_contents_size / (1024 * 1024) + + if llm_router is not None and request_data["model"] in router_model_names: + try: + deployment: Optional[Deployment] = ( + llm_router.get_deployment_by_model_group_name( + model_group_name=request_data["model"] + ) + ) + if ( + deployment + and deployment.litellm_params is not None + and deployment.litellm_params.max_file_size_mb is not None + ): + max_file_size_mb = deployment.litellm_params.max_file_size_mb + except Exception as e: + verbose_proxy_logger.error( + "Got error when checking file size: %s", (str(e)) + ) + + if max_file_size_mb is not None: + verbose_proxy_logger.debug( + "Checking file size, file content size=%s, max_file_size_mb=%s", + file_content_size_in_mb, + max_file_size_mb, + ) + if not premium_user: + raise ProxyException( + message=f"Tried setting max_file_size_mb for /audio/transcriptions. {CommonProxyErrors.not_premium_user.value}", + code=status.HTTP_400_BAD_REQUEST, + type="bad_request", + param="file", + ) + if file_content_size_in_mb > max_file_size_mb: + raise ProxyException( + message=f"File size is too large. Please check your file size. Passed file size: {file_content_size_in_mb} MB. Max file size: {max_file_size_mb} MB", + code=status.HTTP_400_BAD_REQUEST, + type="bad_request", + param="file", + ) + + return True diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 762155ed9..1f9f4e496 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -7,6 +7,13 @@ model_list: - model_name: gemini-flash litellm_params: model: gemini/gemini-1.5-flash + - model_name: whisper + litellm_params: + model: whisper-1 + api_key: sk-******* + max_file_size_mb: 1000 + model_info: + mode: audio_transcription general_settings: master_key: sk-1234 diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index eaf0b7b58..9dc735d46 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -143,7 +143,10 @@ from litellm.proxy.common_utils.encrypt_decrypt_utils import ( decrypt_value_helper, encrypt_value_helper, ) -from litellm.proxy.common_utils.http_parsing_utils import _read_request_body +from litellm.proxy.common_utils.http_parsing_utils import ( + _read_request_body, + check_file_size_under_limit, +) from litellm.proxy.common_utils.init_callbacks import initialize_callbacks_on_proxy from litellm.proxy.common_utils.openai_endpoint_utils import ( remove_sensitive_info_from_deployment, @@ -3796,7 +3799,13 @@ async def audio_transcriptions( param="file", ) - # Instead of writing to a file + # Check if File can be read in memory before reading + check_file_size_under_limit( + request_data=data, + file=file, + router_model_names=router_model_names, + ) + file_content = await file.read() file_object = io.BytesIO(file_content) file_object.name = file.filename diff --git a/litellm/router.py b/litellm/router.py index b27de0490..f50723ab9 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -3684,6 +3684,24 @@ class Router: raise Exception("Model invalid format - {}".format(type(model))) return None + def get_deployment_by_model_group_name( + self, model_group_name: str + ) -> Optional[Deployment]: + """ + Returns -> Deployment or None + + Raise Exception -> if model found in invalid format + """ + for model in self.model_list: + if model["model_name"] == model_group_name: + if isinstance(model, dict): + return Deployment(**model) + elif isinstance(model, Deployment): + return model + else: + raise Exception("Model Name invalid - {}".format(type(model))) + return None + def get_router_model_info(self, deployment: dict) -> ModelMapInfo: """ For a given model id, return the model info (max tokens, input cost, output cost, etc.). diff --git a/litellm/types/router.py b/litellm/types/router.py index 9c028fa87..e7b8971bc 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -154,6 +154,8 @@ class GenericLiteLLMParams(BaseModel): input_cost_per_second: Optional[float] = None output_cost_per_second: Optional[float] = None + max_file_size_mb: Optional[float] = None + model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True) def __init__( @@ -185,6 +187,7 @@ class GenericLiteLLMParams(BaseModel): output_cost_per_token: Optional[float] = None, input_cost_per_second: Optional[float] = None, output_cost_per_second: Optional[float] = None, + max_file_size_mb: Optional[float] = None, **params, ): args = locals() @@ -243,6 +246,9 @@ class LiteLLM_Params(GenericLiteLLMParams): aws_access_key_id: Optional[str] = None, aws_secret_access_key: Optional[str] = None, aws_region_name: Optional[str] = None, + # OpenAI / Azure Whisper + # set a max-size of file that can be passed to litellm proxy + max_file_size_mb: Optional[float] = None, **params, ): args = locals()