fix(parallel_request_limiter.py): handle tpm/rpm limits being null

2024-01-19 10:22:27 -08:00 · 2024-01-19 10:22:27 -08:00 · 1a29272b47
commit 1a29272b47
parent 62e35f6a14
5 changed files with 100 additions and 8 deletions
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -132,8 +132,8 @@ class GenerateKeyRequest(LiteLLMBase):
    team_id: Optional[str] = None
    max_parallel_requests: Optional[int] = None
    metadata: Optional[dict] = {}
-    tpm_limit: int = sys.maxsize
+    tpm_limit: Optional[int] = None
-    rpm_limit: int = sys.maxsize
+    rpm_limit: Optional[int] = None
 class UpdateKeyRequest(LiteLLMBase):
@ -148,8 +148,8 @@ class UpdateKeyRequest(LiteLLMBase):
    user_id: Optional[str] = None
    max_parallel_requests: Optional[int] = None
    metadata: Optional[dict] = None
-    tpm_limit: int = sys.maxsize
+    tpm_limit: Optional[int] = None
-    rpm_limit: int = sys.maxsize
+    rpm_limit: Optional[int] = None
 class UserAPIKeyAuth(LiteLLMBase):  # the expected response object for user api key auth
@ -166,8 +166,8 @@ class UserAPIKeyAuth(LiteLLMBase):  # the expected response object for user api
    max_parallel_requests: Optional[int] = None
    duration: str = "1h"
    metadata: dict = {}
-    tpm_limit: int = sys.maxsize
+    tpm_limit: Optional[int] = None
-    rpm_limit: int = sys.maxsize
+    rpm_limit: Optional[int] = None
 class GenerateKeyResponse(LiteLLMBase):
--- a/litellm/proxy/hooks/parallel_request_limiter.py
+++ b/litellm/proxy/hooks/parallel_request_limiter.py
@ -29,8 +29,8 @@ class MaxParallelRequestsHandler(CustomLogger):
        self.print_verbose(f"Inside Max Parallel Request Pre-Call Hook")
        api_key = user_api_key_dict.api_key
        max_parallel_requests = user_api_key_dict.max_parallel_requests or sys.maxsize
-        tpm_limit = user_api_key_dict.tpm_limit
+        tpm_limit = user_api_key_dict.tpm_limit or sys.maxsize
-        rpm_limit = user_api_key_dict.rpm_limit
+        rpm_limit = user_api_key_dict.rpm_limit or sys.maxsize
        if api_key is None:
            return
--- a/tests/README.MD
+++ b/tests/README.MD
@ -0,0 +1 @@
 Most tests are in `/litellm/tests`. These are just the tests for the proxy docker image, used for circle ci. 
--- a/tests/test_chat_completion.py
+++ b/tests/test_chat_completion.py
@ -0,0 +1,58 @@
 # What this tests ?
 ## Tests /chat/completions by generating a key and then making a chat completions request
 import pytest
 import asyncio
 import aiohttp
 async def generate_key(session):
    url = "http://0.0.0.0:4000/key/generate"
    headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
    data = {
        "models": ["gpt-4"],
        "duration": None,
    }
    async with session.post(url, headers=headers, json=data) as response:
        status = response.status
        response_text = await response.text()
        print(response_text)
        print()
        if status != 200:
            raise Exception(f"Request did not return a 200 status code: {status}")
        return await response.json()
 async def chat_completion(session, key):
    url = "http://0.0.0.0:4000/chat/completions"
    headers = {
        "Authorization": f"Bearer {key}",
        "Content-Type": "application/json",
    }
    data = {
        "model": "gpt-4",
        "messages": [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "Hello!"},
        ],
    }
    async with session.post(url, headers=headers, json=data) as response:
        status = response.status
        response_text = await response.text()
        print(response_text)
        print()
        if status != 200:
            raise Exception(f"Request did not return a 200 status code: {status}")
@pytest.mark.asyncio
 async def test_key_gen():
    async with aiohttp.ClientSession() as session:
        key_gen = await generate_key(session=session)
        key = key_gen["key"]
        await chat_completion(session=session, key=key)
--- a/tests/test_parallel_key_gen.py
+++ b/tests/test_parallel_key_gen.py
@ -0,0 +1,33 @@
 # What this tests ?
 ## Tests /key/generate by making 10 parallel requests, and asserting all are successful
 import pytest
 import asyncio
 import aiohttp
 async def generate_key(session, i):
    url = "http://0.0.0.0:4000/key/generate"
    headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
    data = {
        "models": ["azure-models"],
        "aliases": {"mistral-7b": "gpt-3.5-turbo"},
        "duration": None,
    }
    async with session.post(url, headers=headers, json=data) as response:
        status = response.status
        response_text = await response.text()
        print(f"Response {i} (Status code: {status}):")
        print(response_text)
        print()
        if status != 200:
            raise Exception(f"Request {i} did not return a 200 status code: {status}")
@pytest.mark.asyncio
 async def test_key_gen():
    async with aiohttp.ClientSession() as session:
        tasks = [generate_key(session, i) for i in range(1, 11)]
        await asyncio.gather(*tasks)
		`@ -0,0 +1 @@`
							Most tests are in `/litellm/tests`. These are just the tests for the proxy docker image, used for circle ci.