From 865469e43f7da076caf6f50a363ec0bf04783598 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 15 Jul 2024 19:25:24 -0700 Subject: [PATCH 01/10] allow setting max_file_size_mb --- litellm/types/router.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/litellm/types/router.py b/litellm/types/router.py index 9c028fa87..6da80d506 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -154,6 +154,8 @@ class GenericLiteLLMParams(BaseModel): input_cost_per_second: Optional[float] = None output_cost_per_second: Optional[float] = None + max_file_size_mb: Optional[int] = None + model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True) def __init__( @@ -185,6 +187,7 @@ class GenericLiteLLMParams(BaseModel): output_cost_per_token: Optional[float] = None, input_cost_per_second: Optional[float] = None, output_cost_per_second: Optional[float] = None, + max_file_size_mb: Optional[int] = None, **params, ): args = locals() @@ -243,6 +246,9 @@ class LiteLLM_Params(GenericLiteLLMParams): aws_access_key_id: Optional[str] = None, aws_secret_access_key: Optional[str] = None, aws_region_name: Optional[str] = None, + # OpenAI / Azure Whisper + # set a max-size of file that can be passed to litellm proxy + max_file_size_mb: Optional[int] = None, **params, ): args = locals() From e65daef572c19e4d2968be85f1ebc5cbf3fb32c3 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 15 Jul 2024 19:27:12 -0700 Subject: [PATCH 02/10] router return get_deployment_by_model_group_name --- litellm/router.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/litellm/router.py b/litellm/router.py index b27de0490..f50723ab9 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -3684,6 +3684,24 @@ class Router: raise Exception("Model invalid format - {}".format(type(model))) return None + def get_deployment_by_model_group_name( + self, model_group_name: str + ) -> Optional[Deployment]: + """ + Returns -> Deployment or None + + Raise Exception -> if model found in invalid format + """ + for model in self.model_list: + if model["model_name"] == model_group_name: + if isinstance(model, dict): + return Deployment(**model) + elif isinstance(model, Deployment): + return model + else: + raise Exception("Model Name invalid - {}".format(type(model))) + return None + def get_router_model_info(self, deployment: dict) -> ModelMapInfo: """ For a given model id, return the model info (max tokens, input cost, output cost, etc.). From 6c060b1fdc4f69bb939eef71d93131bc1d115730 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 15 Jul 2024 19:38:08 -0700 Subject: [PATCH 03/10] check_file_size_under_limit --- .../proxy/common_utils/http_parsing_utils.py | 72 ++++++++++++++++++- 1 file changed, 70 insertions(+), 2 deletions(-) diff --git a/litellm/proxy/common_utils/http_parsing_utils.py b/litellm/proxy/common_utils/http_parsing_utils.py index cc2471391..fb1432551 100644 --- a/litellm/proxy/common_utils/http_parsing_utils.py +++ b/litellm/proxy/common_utils/http_parsing_utils.py @@ -1,6 +1,11 @@ +import ast +import json from typing import Optional -from fastapi import Request -import ast, json + +from fastapi import Request, UploadFile, status + +from litellm._logging import verbose_proxy_logger +from litellm.types.router import Deployment async def _read_request_body(request: Optional[Request]) -> dict: @@ -29,3 +34,66 @@ async def _read_request_body(request: Optional[Request]) -> dict: return request_data except: return {} + + +def check_file_size_under_limit( + request_data: dict, + file: UploadFile, + router_model_names: list[str], +) -> bool: + """ + Check if any files passed in request are under max_file_size_mb + + Returns True -> when file size is under max_file_size_mb limit + Raises ProxyException -> when file size is over max_file_size_mb limit or not a premium_user + """ + from litellm.proxy.proxy_server import ( + CommonProxyErrors, + ProxyException, + llm_router, + premium_user, + ) + + file_contents_size = file.size or 0 + file_content_size_in_mb = file_contents_size / (1024 * 1024) + + if llm_router is not None and request_data["model"] in router_model_names: + try: + deployment: Optional[Deployment] = ( + llm_router.get_deployment_by_model_group_name( + model_group_name=request_data["model"] + ) + ) + if ( + deployment + and deployment.litellm_params is not None + and deployment.litellm_params.max_file_size_mb is not None + ): + max_file_size_mb = deployment.litellm_params.max_file_size_mb + except Exception as e: + verbose_proxy_logger.error( + "Got error when checking file size: %s", (str(e)) + ) + + if max_file_size_mb is not None: + verbose_proxy_logger.debug( + "Checking file size, file content size=%s, max_file_size_mb=%s", + file_content_size_in_mb, + max_file_size_mb, + ) + if not premium_user: + raise ProxyException( + message=f"Tried setting max_file_size_mb for /audio/transcriptions. {CommonProxyErrors.not_premium_user.value}", + code=status.HTTP_400_BAD_REQUEST, + type="bad_request", + param="file", + ) + if file_content_size_in_mb > max_file_size_mb: + raise ProxyException( + message="File size is too large. Please check your file size", + code=status.HTTP_400_BAD_REQUEST, + type="bad_request", + param="file", + ) + + return True From b5a2090720fd68efab283e68b6792d7f9f19de73 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 15 Jul 2024 19:40:05 -0700 Subject: [PATCH 04/10] use helper to check check_file_size_under_limit --- litellm/proxy/proxy_server.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 03d35a1ce..7afa00f58 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -143,7 +143,10 @@ from litellm.proxy.common_utils.encrypt_decrypt_utils import ( decrypt_value_helper, encrypt_value_helper, ) -from litellm.proxy.common_utils.http_parsing_utils import _read_request_body +from litellm.proxy.common_utils.http_parsing_utils import ( + _read_request_body, + check_file_size_under_limit, +) from litellm.proxy.common_utils.init_callbacks import initialize_callbacks_on_proxy from litellm.proxy.common_utils.openai_endpoint_utils import ( remove_sensitive_info_from_deployment, @@ -3796,7 +3799,13 @@ async def audio_transcriptions( param="file", ) - # Instead of writing to a file + # Check if File can be read in memory before reading + check_file_size_under_limit( + request_data=data, + file=file, + router_model_names=router_model_names, + ) + file_content = await file.read() file_object = io.BytesIO(file_content) file_object.name = file.filename From 48d28e37a4c040ea033dc10c135d5bb9e0c31969 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 15 Jul 2024 19:41:38 -0700 Subject: [PATCH 05/10] fix set max_file_size --- litellm/proxy/proxy_config.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 762155ed9..1f9f4e496 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -7,6 +7,13 @@ model_list: - model_name: gemini-flash litellm_params: model: gemini/gemini-1.5-flash + - model_name: whisper + litellm_params: + model: whisper-1 + api_key: sk-******* + max_file_size_mb: 1000 + model_info: + mode: audio_transcription general_settings: master_key: sk-1234 From 685253568b224aee5b9e1578216910dbbc201285 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 15 Jul 2024 19:57:17 -0700 Subject: [PATCH 06/10] doc set max file size --- docs/my-website/docs/text_to_speech.md | 29 ++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/docs/my-website/docs/text_to_speech.md b/docs/my-website/docs/text_to_speech.md index 73a12c434..5e5f8b990 100644 --- a/docs/my-website/docs/text_to_speech.md +++ b/docs/my-website/docs/text_to_speech.md @@ -109,4 +109,33 @@ response = speech( input="the quick brown fox jumped over the lazy dogs", ) response.stream_to_file(speech_file_path) +``` + +## ✨ Enterprise LiteLLM Proxy - Set Max Request File Size + +Use this when you want to limit the file size for requests sent to `audio/transcriptions` + +```yaml +- model_name: whisper + litellm_params: + model: whisper-1 + api_key: sk-******* + max_file_size_mb: 0.00001 # 👈 max file size in MB (Set this intentionally very small for testing) + model_info: + mode: audio_transcription +``` + +Make a test Request with a valid file +```shell +curl --location 'http://localhost:4000/v1/audio/transcriptions' \ +--header 'Authorization: Bearer sk-1234' \ +--form 'file=@"/Users/ishaanjaffer/Github/litellm/tests/gettysburg.wav"' \ +--form 'model="whisper"' +``` + + +Expect to see the follow response + +```shell +{"error":{"message":"File size is too large. Please check your file size. Passed file size: 0.7392807006835938 MB. Max file size: 0.0001 MB","type":"bad_request","param":"file","code":500}}% ``` \ No newline at end of file From 38cef1c58df1cbe7666fc7e75b64916b19b1f28c Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 15 Jul 2024 19:57:33 -0700 Subject: [PATCH 07/10] fix error from max file size --- litellm/proxy/common_utils/http_parsing_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/proxy/common_utils/http_parsing_utils.py b/litellm/proxy/common_utils/http_parsing_utils.py index fb1432551..7014b1e31 100644 --- a/litellm/proxy/common_utils/http_parsing_utils.py +++ b/litellm/proxy/common_utils/http_parsing_utils.py @@ -90,7 +90,7 @@ def check_file_size_under_limit( ) if file_content_size_in_mb > max_file_size_mb: raise ProxyException( - message="File size is too large. Please check your file size", + message=f"File size is too large. Please check your file size. Passed file size: {file_content_size_in_mb} MB. Max file size: {max_file_size_mb} MB", code=status.HTTP_400_BAD_REQUEST, type="bad_request", param="file", From 0bd747ef7e0d17e5d3ffd26f2816fce5353bcf05 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 15 Jul 2024 19:58:41 -0700 Subject: [PATCH 08/10] max_file_size_mb in float --- litellm/types/router.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/litellm/types/router.py b/litellm/types/router.py index 6da80d506..e7b8971bc 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -154,7 +154,7 @@ class GenericLiteLLMParams(BaseModel): input_cost_per_second: Optional[float] = None output_cost_per_second: Optional[float] = None - max_file_size_mb: Optional[int] = None + max_file_size_mb: Optional[float] = None model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True) @@ -187,7 +187,7 @@ class GenericLiteLLMParams(BaseModel): output_cost_per_token: Optional[float] = None, input_cost_per_second: Optional[float] = None, output_cost_per_second: Optional[float] = None, - max_file_size_mb: Optional[int] = None, + max_file_size_mb: Optional[float] = None, **params, ): args = locals() @@ -248,7 +248,7 @@ class LiteLLM_Params(GenericLiteLLMParams): aws_region_name: Optional[str] = None, # OpenAI / Azure Whisper # set a max-size of file that can be passed to litellm proxy - max_file_size_mb: Optional[int] = None, + max_file_size_mb: Optional[float] = None, **params, ): args = locals() From bac6685bfc5429850fec5b34ea4720a09de58da3 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 15 Jul 2024 20:02:41 -0700 Subject: [PATCH 09/10] fix linting --- litellm/proxy/common_utils/http_parsing_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/litellm/proxy/common_utils/http_parsing_utils.py b/litellm/proxy/common_utils/http_parsing_utils.py index 7014b1e31..72c901c15 100644 --- a/litellm/proxy/common_utils/http_parsing_utils.py +++ b/litellm/proxy/common_utils/http_parsing_utils.py @@ -1,6 +1,6 @@ import ast import json -from typing import Optional +from typing import List, Optional from fastapi import Request, UploadFile, status @@ -39,7 +39,7 @@ async def _read_request_body(request: Optional[Request]) -> dict: def check_file_size_under_limit( request_data: dict, file: UploadFile, - router_model_names: list[str], + router_model_names: List[str], ) -> bool: """ Check if any files passed in request are under max_file_size_mb From e1c1149a64c91a5c1d12ed04ab7ef028188808b9 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 15 Jul 2024 20:05:14 -0700 Subject: [PATCH 10/10] docs set max file size on requests --- docs/my-website/docs/enterprise.md | 1 + docs/my-website/docs/proxy/enterprise.md | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/my-website/docs/enterprise.md b/docs/my-website/docs/enterprise.md index f33e2dda9..9f075ef35 100644 --- a/docs/my-website/docs/enterprise.md +++ b/docs/my-website/docs/enterprise.md @@ -27,6 +27,7 @@ This covers: - ✅ IP address‑based access control lists - ✅ Track Request IP Address - ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](./proxy/pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints) + - ✅ Set Max Request / File Size on Requests - ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](./proxy/enterprise#enforce-required-params-for-llm-requests) - **Spend Tracking** - ✅ [Tracking Spend for Custom Tags](./proxy/enterprise#tracking-spend-for-custom-tags) diff --git a/docs/my-website/docs/proxy/enterprise.md b/docs/my-website/docs/proxy/enterprise.md index f30b4b978..00703b974 100644 --- a/docs/my-website/docs/proxy/enterprise.md +++ b/docs/my-website/docs/proxy/enterprise.md @@ -21,6 +21,7 @@ Features: - ✅ IP address‑based access control lists - ✅ Track Request IP Address - ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints) + - ✅ Set Max Request / File Size on Requests - ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](#enforce-required-params-for-llm-requests) - **Spend Tracking** - ✅ [Tracking Spend for Custom Tags](#tracking-spend-for-custom-tags)