forked from phoenix/litellm-mirror
Merge pull request #4724 from BerriAI/litellm_Set_max_file_size_transc
[Feat] - set max file size on /audio/transcriptions
This commit is contained in:
commit
254ac37f65
8 changed files with 144 additions and 5 deletions
|
@ -27,6 +27,7 @@ This covers:
|
|||
- ✅ IP address‑based access control lists
|
||||
- ✅ Track Request IP Address
|
||||
- ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](./proxy/pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints)
|
||||
- ✅ Set Max Request / File Size on Requests
|
||||
- ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](./proxy/enterprise#enforce-required-params-for-llm-requests)
|
||||
- **Spend Tracking**
|
||||
- ✅ [Tracking Spend for Custom Tags](./proxy/enterprise#tracking-spend-for-custom-tags)
|
||||
|
|
|
@ -21,6 +21,7 @@ Features:
|
|||
- ✅ IP address‑based access control lists
|
||||
- ✅ Track Request IP Address
|
||||
- ✅ [Use LiteLLM keys/authentication on Pass Through Endpoints](pass_through#✨-enterprise---use-litellm-keysauthentication-on-pass-through-endpoints)
|
||||
- ✅ Set Max Request / File Size on Requests
|
||||
- ✅ [Enforce Required Params for LLM Requests (ex. Reject requests missing ["metadata"]["generation_name"])](#enforce-required-params-for-llm-requests)
|
||||
- **Spend Tracking**
|
||||
- ✅ [Tracking Spend for Custom Tags](#tracking-spend-for-custom-tags)
|
||||
|
|
|
@ -110,3 +110,32 @@ response = speech(
|
|||
)
|
||||
response.stream_to_file(speech_file_path)
|
||||
```
|
||||
|
||||
## ✨ Enterprise LiteLLM Proxy - Set Max Request File Size
|
||||
|
||||
Use this when you want to limit the file size for requests sent to `audio/transcriptions`
|
||||
|
||||
```yaml
|
||||
- model_name: whisper
|
||||
litellm_params:
|
||||
model: whisper-1
|
||||
api_key: sk-*******
|
||||
max_file_size_mb: 0.00001 # 👈 max file size in MB (Set this intentionally very small for testing)
|
||||
model_info:
|
||||
mode: audio_transcription
|
||||
```
|
||||
|
||||
Make a test Request with a valid file
|
||||
```shell
|
||||
curl --location 'http://localhost:4000/v1/audio/transcriptions' \
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
--form 'file=@"/Users/ishaanjaffer/Github/litellm/tests/gettysburg.wav"' \
|
||||
--form 'model="whisper"'
|
||||
```
|
||||
|
||||
|
||||
Expect to see the follow response
|
||||
|
||||
```shell
|
||||
{"error":{"message":"File size is too large. Please check your file size. Passed file size: 0.7392807006835938 MB. Max file size: 0.0001 MB","type":"bad_request","param":"file","code":500}}%
|
||||
```
|
|
@ -1,6 +1,11 @@
|
|||
from typing import Optional
|
||||
from fastapi import Request
|
||||
import ast, json
|
||||
import ast
|
||||
import json
|
||||
from typing import List, Optional
|
||||
|
||||
from fastapi import Request, UploadFile, status
|
||||
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.types.router import Deployment
|
||||
|
||||
|
||||
async def _read_request_body(request: Optional[Request]) -> dict:
|
||||
|
@ -29,3 +34,66 @@ async def _read_request_body(request: Optional[Request]) -> dict:
|
|||
return request_data
|
||||
except:
|
||||
return {}
|
||||
|
||||
|
||||
def check_file_size_under_limit(
|
||||
request_data: dict,
|
||||
file: UploadFile,
|
||||
router_model_names: List[str],
|
||||
) -> bool:
|
||||
"""
|
||||
Check if any files passed in request are under max_file_size_mb
|
||||
|
||||
Returns True -> when file size is under max_file_size_mb limit
|
||||
Raises ProxyException -> when file size is over max_file_size_mb limit or not a premium_user
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
CommonProxyErrors,
|
||||
ProxyException,
|
||||
llm_router,
|
||||
premium_user,
|
||||
)
|
||||
|
||||
file_contents_size = file.size or 0
|
||||
file_content_size_in_mb = file_contents_size / (1024 * 1024)
|
||||
|
||||
if llm_router is not None and request_data["model"] in router_model_names:
|
||||
try:
|
||||
deployment: Optional[Deployment] = (
|
||||
llm_router.get_deployment_by_model_group_name(
|
||||
model_group_name=request_data["model"]
|
||||
)
|
||||
)
|
||||
if (
|
||||
deployment
|
||||
and deployment.litellm_params is not None
|
||||
and deployment.litellm_params.max_file_size_mb is not None
|
||||
):
|
||||
max_file_size_mb = deployment.litellm_params.max_file_size_mb
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error(
|
||||
"Got error when checking file size: %s", (str(e))
|
||||
)
|
||||
|
||||
if max_file_size_mb is not None:
|
||||
verbose_proxy_logger.debug(
|
||||
"Checking file size, file content size=%s, max_file_size_mb=%s",
|
||||
file_content_size_in_mb,
|
||||
max_file_size_mb,
|
||||
)
|
||||
if not premium_user:
|
||||
raise ProxyException(
|
||||
message=f"Tried setting max_file_size_mb for /audio/transcriptions. {CommonProxyErrors.not_premium_user.value}",
|
||||
code=status.HTTP_400_BAD_REQUEST,
|
||||
type="bad_request",
|
||||
param="file",
|
||||
)
|
||||
if file_content_size_in_mb > max_file_size_mb:
|
||||
raise ProxyException(
|
||||
message=f"File size is too large. Please check your file size. Passed file size: {file_content_size_in_mb} MB. Max file size: {max_file_size_mb} MB",
|
||||
code=status.HTTP_400_BAD_REQUEST,
|
||||
type="bad_request",
|
||||
param="file",
|
||||
)
|
||||
|
||||
return True
|
||||
|
|
|
@ -7,6 +7,13 @@ model_list:
|
|||
- model_name: gemini-flash
|
||||
litellm_params:
|
||||
model: gemini/gemini-1.5-flash
|
||||
- model_name: whisper
|
||||
litellm_params:
|
||||
model: whisper-1
|
||||
api_key: sk-*******
|
||||
max_file_size_mb: 1000
|
||||
model_info:
|
||||
mode: audio_transcription
|
||||
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
|
|
|
@ -143,7 +143,10 @@ from litellm.proxy.common_utils.encrypt_decrypt_utils import (
|
|||
decrypt_value_helper,
|
||||
encrypt_value_helper,
|
||||
)
|
||||
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
|
||||
from litellm.proxy.common_utils.http_parsing_utils import (
|
||||
_read_request_body,
|
||||
check_file_size_under_limit,
|
||||
)
|
||||
from litellm.proxy.common_utils.init_callbacks import initialize_callbacks_on_proxy
|
||||
from litellm.proxy.common_utils.openai_endpoint_utils import (
|
||||
remove_sensitive_info_from_deployment,
|
||||
|
@ -3796,7 +3799,13 @@ async def audio_transcriptions(
|
|||
param="file",
|
||||
)
|
||||
|
||||
# Instead of writing to a file
|
||||
# Check if File can be read in memory before reading
|
||||
check_file_size_under_limit(
|
||||
request_data=data,
|
||||
file=file,
|
||||
router_model_names=router_model_names,
|
||||
)
|
||||
|
||||
file_content = await file.read()
|
||||
file_object = io.BytesIO(file_content)
|
||||
file_object.name = file.filename
|
||||
|
|
|
@ -3684,6 +3684,24 @@ class Router:
|
|||
raise Exception("Model invalid format - {}".format(type(model)))
|
||||
return None
|
||||
|
||||
def get_deployment_by_model_group_name(
|
||||
self, model_group_name: str
|
||||
) -> Optional[Deployment]:
|
||||
"""
|
||||
Returns -> Deployment or None
|
||||
|
||||
Raise Exception -> if model found in invalid format
|
||||
"""
|
||||
for model in self.model_list:
|
||||
if model["model_name"] == model_group_name:
|
||||
if isinstance(model, dict):
|
||||
return Deployment(**model)
|
||||
elif isinstance(model, Deployment):
|
||||
return model
|
||||
else:
|
||||
raise Exception("Model Name invalid - {}".format(type(model)))
|
||||
return None
|
||||
|
||||
def get_router_model_info(self, deployment: dict) -> ModelMapInfo:
|
||||
"""
|
||||
For a given model id, return the model info (max tokens, input cost, output cost, etc.).
|
||||
|
|
|
@ -154,6 +154,8 @@ class GenericLiteLLMParams(BaseModel):
|
|||
input_cost_per_second: Optional[float] = None
|
||||
output_cost_per_second: Optional[float] = None
|
||||
|
||||
max_file_size_mb: Optional[float] = None
|
||||
|
||||
model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
|
||||
|
||||
def __init__(
|
||||
|
@ -185,6 +187,7 @@ class GenericLiteLLMParams(BaseModel):
|
|||
output_cost_per_token: Optional[float] = None,
|
||||
input_cost_per_second: Optional[float] = None,
|
||||
output_cost_per_second: Optional[float] = None,
|
||||
max_file_size_mb: Optional[float] = None,
|
||||
**params,
|
||||
):
|
||||
args = locals()
|
||||
|
@ -243,6 +246,9 @@ class LiteLLM_Params(GenericLiteLLMParams):
|
|||
aws_access_key_id: Optional[str] = None,
|
||||
aws_secret_access_key: Optional[str] = None,
|
||||
aws_region_name: Optional[str] = None,
|
||||
# OpenAI / Azure Whisper
|
||||
# set a max-size of file that can be passed to litellm proxy
|
||||
max_file_size_mb: Optional[float] = None,
|
||||
**params,
|
||||
):
|
||||
args = locals()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue