Merge pull request #4724 from BerriAI/litellm_Set_max_file_size_transc

[Feat] - set max file size on /audio/transcriptions
2024-07-15 20:42:24 -07:00 · 2024-07-15 20:42:24 -07:00 · 254ac37f65
commit 254ac37f65
parent af19a2aff3 e1c1149a64
8 changed files with 144 additions and 5 deletions
--- a/docs/my-website/docs/enterprise.md
+++ b/docs/my-website/docs/enterprise.md
@ -27,6 +27,7 @@ This covers:
        - ✅ IP address‑based access control lists
        - ✅ Track Request IP Address
        - ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](./proxy/pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints)
+        - ✅ Set Max Request / File Size on Requests
        - ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](./proxy/enterprise#enforce-required-params-for-llm-requests)
    - **Spend Tracking**
        - ✅ [Tracking Spend for Custom Tags](./proxy/enterprise#tracking-spend-for-custom-tags)
--- a/docs/my-website/docs/proxy/enterprise.md
+++ b/docs/my-website/docs/proxy/enterprise.md
@ -21,6 +21,7 @@ Features:
    - ✅ IP address‑based access control lists
    - ✅ Track Request IP Address
    - ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints)
+    - ✅ Set Max Request / File Size on Requests
    - ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](#enforce-required-params-for-llm-requests)
 - **Spend Tracking**
    - ✅ [Tracking Spend for Custom Tags](#tracking-spend-for-custom-tags)
--- a/docs/my-website/docs/text_to_speech.md
+++ b/docs/my-website/docs/text_to_speech.md
@ -110,3 +110,32 @@ response = speech(
    )
 response.stream_to_file(speech_file_path)
 ```
+
+## ✨ Enterprise LiteLLM Proxy - Set Max Request File Size 
+
+Use this when you want to limit the file size for requests sent to `audio/transcriptions`
+
+```yaml
+- model_name: whisper
+  litellm_params:
+    model: whisper-1
+    api_key: sk-*******
+    max_file_size_mb: 0.00001 # 👈 max file size in MB  (Set this intentionally very small for testing)
+  model_info:
+    mode: audio_transcription
+```
+
+Make a test Request with a valid file
+```shell
+curl --location 'http://localhost:4000/v1/audio/transcriptions' \
+--header 'Authorization: Bearer sk-1234' \
+--form 'file=@"/Users/ishaanjaffer/Github/litellm/tests/gettysburg.wav"' \
+--form 'model="whisper"'
+```
+
+
+Expect to see the follow response 
+
+```shell
+{"error":{"message":"File size is too large. Please check your file size. Passed file size: 0.7392807006835938 MB. Max file size: 0.0001 MB","type":"bad_request","param":"file","code":500}}%  
+```
--- a/litellm/proxy/common_utils/http_parsing_utils.py
+++ b/litellm/proxy/common_utils/http_parsing_utils.py
@ -1,6 +1,11 @@
-from typing import Optional
-from fastapi import Request
-import ast, json
+import ast
+import json
+from typing import List, Optional
+
+from fastapi import Request, UploadFile, status
+
+from litellm._logging import verbose_proxy_logger
+from litellm.types.router import Deployment


 async def _read_request_body(request: Optional[Request]) -> dict:
@ -29,3 +34,66 @@ async def _read_request_body(request: Optional[Request]) -> dict:
        return request_data
    except:
        return {}
+
+
+def check_file_size_under_limit(
+    request_data: dict,
+    file: UploadFile,
+    router_model_names: List[str],
+) -> bool:
+    """
+    Check if any files passed in request are under max_file_size_mb
+
+    Returns True -> when file size is under max_file_size_mb limit
+    Raises ProxyException -> when file size is over max_file_size_mb limit or not a premium_user
+    """
+    from litellm.proxy.proxy_server import (
+        CommonProxyErrors,
+        ProxyException,
+        llm_router,
+        premium_user,
+    )
+
+    file_contents_size = file.size or 0
+    file_content_size_in_mb = file_contents_size / (1024 * 1024)
+
+    if llm_router is not None and request_data["model"] in router_model_names:
+        try:
+            deployment: Optional[Deployment] = (
+                llm_router.get_deployment_by_model_group_name(
+                    model_group_name=request_data["model"]
+                )
+            )
+            if (
+                deployment
+                and deployment.litellm_params is not None
+                and deployment.litellm_params.max_file_size_mb is not None
+            ):
+                max_file_size_mb = deployment.litellm_params.max_file_size_mb
+        except Exception as e:
+            verbose_proxy_logger.error(
+                "Got error when checking file size: %s", (str(e))
+            )
+
+    if max_file_size_mb is not None:
+        verbose_proxy_logger.debug(
+            "Checking file size, file content size=%s, max_file_size_mb=%s",
+            file_content_size_in_mb,
+            max_file_size_mb,
+        )
+        if not premium_user:
+            raise ProxyException(
+                message=f"Tried setting max_file_size_mb for /audio/transcriptions. {CommonProxyErrors.not_premium_user.value}",
+                code=status.HTTP_400_BAD_REQUEST,
+                type="bad_request",
+                param="file",
+            )
+        if file_content_size_in_mb > max_file_size_mb:
+            raise ProxyException(
+                message=f"File size is too large. Please check your file size. Passed file size: {file_content_size_in_mb} MB. Max file size: {max_file_size_mb} MB",
+                code=status.HTTP_400_BAD_REQUEST,
+                type="bad_request",
+                param="file",
+            )
+
+    return True
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -7,6 +7,13 @@ model_list:
  - model_name: gemini-flash
    litellm_params:
      model: gemini/gemini-1.5-flash
+  - model_name: whisper
+    litellm_params:
+      model: whisper-1
+      api_key: sk-*******
+      max_file_size_mb: 1000
+    model_info:
+      mode: audio_transcription

 general_settings: 
  master_key: sk-1234 
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -143,7 +143,10 @@ from litellm.proxy.common_utils.encrypt_decrypt_utils import (
    decrypt_value_helper,
    encrypt_value_helper,
 )
-from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
+from litellm.proxy.common_utils.http_parsing_utils import (
+    _read_request_body,
+    check_file_size_under_limit,
+)
 from litellm.proxy.common_utils.init_callbacks import initialize_callbacks_on_proxy
 from litellm.proxy.common_utils.openai_endpoint_utils import (
    remove_sensitive_info_from_deployment,
@ -3796,7 +3799,13 @@ async def audio_transcriptions(
                param="file",
            )

-        # Instead of writing to a file
+        # Check if File can be read in memory before reading
+        check_file_size_under_limit(
+            request_data=data,
+            file=file,
+            router_model_names=router_model_names,
+        )
+
        file_content = await file.read()
        file_object = io.BytesIO(file_content)
        file_object.name = file.filename
--- a/litellm/router.py
+++ b/litellm/router.py
@ -3684,6 +3684,24 @@ class Router:
                        raise Exception("Model invalid format - {}".format(type(model)))
        return None

+    def get_deployment_by_model_group_name(
+        self, model_group_name: str
+    ) -> Optional[Deployment]:
+        """
+        Returns -> Deployment or None
+
+        Raise Exception -> if model found in invalid format
+        """
+        for model in self.model_list:
+            if model["model_name"] == model_group_name:
+                if isinstance(model, dict):
+                    return Deployment(**model)
+                elif isinstance(model, Deployment):
+                    return model
+                else:
+                    raise Exception("Model Name invalid - {}".format(type(model)))
+        return None
+
    def get_router_model_info(self, deployment: dict) -> ModelMapInfo:
        """
        For a given model id, return the model info (max tokens, input cost, output cost, etc.).
--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@ -154,6 +154,8 @@ class GenericLiteLLMParams(BaseModel):
    input_cost_per_second: Optional[float] = None
    output_cost_per_second: Optional[float] = None

+    max_file_size_mb: Optional[float] = None
+
    model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)

    def __init__(
@ -185,6 +187,7 @@ class GenericLiteLLMParams(BaseModel):
        output_cost_per_token: Optional[float] = None,
        input_cost_per_second: Optional[float] = None,
        output_cost_per_second: Optional[float] = None,
+        max_file_size_mb: Optional[float] = None,
        **params,
    ):
        args = locals()
@ -243,6 +246,9 @@ class LiteLLM_Params(GenericLiteLLMParams):
        aws_access_key_id: Optional[str] = None,
        aws_secret_access_key: Optional[str] = None,
        aws_region_name: Optional[str] = None,
+        # OpenAI / Azure Whisper
+        # set a max-size of file that can be passed to litellm proxy
+        max_file_size_mb: Optional[float] = None,
        **params,
    ):
        args = locals()