diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index 771e8526c..25c9d170a 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -131,6 +131,8 @@ class GenerateKeyRequest(LiteLLMBase): user_id: Optional[str] = None max_parallel_requests: Optional[int] = None metadata: Optional[dict] = {} + tpm_limit: int = sys.maxsize + rpm_limit: int = sys.maxsize class UpdateKeyRequest(LiteLLMBase): @@ -145,6 +147,8 @@ class UpdateKeyRequest(LiteLLMBase): user_id: Optional[str] = None max_parallel_requests: Optional[int] = None metadata: Optional[dict] = None + tpm_limit: int = sys.maxsize + rpm_limit: int = sys.maxsize class UserAPIKeyAuth(LiteLLMBase): # the expected response object for user api key auth diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index 19c8e1b7e..584ad98c1 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -418,6 +418,7 @@ def run_server( break # Exit the loop if the subprocess succeeds except subprocess.CalledProcessError as e: print(f"Error: {e}") + time.sleep(random.randrange(start=1, stop=5)) finally: os.chdir(original_dir) else: diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 10c968b1c..b873b85a4 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1040,6 +1040,8 @@ async def generate_key_helper_fn( user_email: Optional[str] = None, max_parallel_requests: Optional[int] = None, metadata: Optional[dict] = {}, + tpm_limit: Optional[int] = None, + rpm_limit: Optional[int] = None, ): global prisma_client, custom_db_client @@ -1080,6 +1082,8 @@ async def generate_key_helper_fn( config_json = json.dumps(config) metadata_json = json.dumps(metadata) user_id = user_id or str(uuid.uuid4()) + tpm_limit = tpm_limit or sys.maxsize + rpm_limit = rpm_limit or sys.maxsize try: # Create a new verification token (you may want to enhance this logic based on your needs) user_data = { @@ -1088,6 +1092,9 @@ async def generate_key_helper_fn( "user_id": user_id, "spend": spend, "models": models, + "max_parallel_requests": max_parallel_requests, + "tpm_limit": tpm_limit, + "rpm_limit": rpm_limit, } key_data = { "token": token, @@ -1099,6 +1106,8 @@ async def generate_key_helper_fn( "user_id": user_id, "max_parallel_requests": max_parallel_requests, "metadata": metadata_json, + "tpm_limit": tpm_limit, + "rpm_limit": rpm_limit, } if prisma_client is not None: ## CREATE USER (If necessary) @@ -2032,7 +2041,6 @@ async def image_generation( response_model=GenerateKeyResponse, ) async def generate_key_fn( - request: Request, data: GenerateKeyRequest, Authorization: Optional[str] = Header(None), ): diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma index aa45a8818..19e22dec3 100644 --- a/litellm/proxy/schema.prisma +++ b/litellm/proxy/schema.prisma @@ -12,7 +12,10 @@ model LiteLLM_UserTable { max_budget Float? spend Float @default(0.0) user_email String? - models String[] @default([]) + models String[] + max_parallel_requests Int? + tpm_limit BigInt? + rpm_limit BigInt? } // required for token gen @@ -20,12 +23,14 @@ model LiteLLM_VerificationToken { token String @unique spend Float @default(0.0) expires DateTime? - models String[] @default([]) + models String[] aliases Json @default("{}") config Json @default("{}") user_id String? max_parallel_requests Int? metadata Json @default("{}") + tpm_limit BigInt? + rpm_limit BigInt? } model LiteLLM_Config { diff --git a/schema.prisma b/schema.prisma index 704ada42c..19e22dec3 100644 --- a/schema.prisma +++ b/schema.prisma @@ -13,6 +13,9 @@ model LiteLLM_UserTable { spend Float @default(0.0) user_email String? models String[] + max_parallel_requests Int? + tpm_limit BigInt? + rpm_limit BigInt? } // required for token gen @@ -26,6 +29,8 @@ model LiteLLM_VerificationToken { user_id String? max_parallel_requests Int? metadata Json @default("{}") + tpm_limit BigInt? + rpm_limit BigInt? } model LiteLLM_Config {