fix(parallel_request_limiter.py): handle tpm/rpm limits being null

2024-01-19 10:22:27 -08:00 · 2024-01-19 10:22:27 -08:00 · 1a29272b47
commit 1a29272b47
parent 62e35f6a14
5 changed files with 100 additions and 8 deletions
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -132,8 +132,8 @@ class GenerateKeyRequest(LiteLLMBase):
    team_id: Optional[str] = None
    max_parallel_requests: Optional[int] = None
    metadata: Optional[dict] = {}
-    tpm_limit: int = sys.maxsize
-    rpm_limit: int = sys.maxsize
+    tpm_limit: Optional[int] = None
+    rpm_limit: Optional[int] = None


 class UpdateKeyRequest(LiteLLMBase):
@ -148,8 +148,8 @@ class UpdateKeyRequest(LiteLLMBase):
    user_id: Optional[str] = None
    max_parallel_requests: Optional[int] = None
    metadata: Optional[dict] = None
-    tpm_limit: int = sys.maxsize
-    rpm_limit: int = sys.maxsize
+    tpm_limit: Optional[int] = None
+    rpm_limit: Optional[int] = None


 class UserAPIKeyAuth(LiteLLMBase):  # the expected response object for user api key auth
@ -166,8 +166,8 @@ class UserAPIKeyAuth(LiteLLMBase):  # the expected response object for user api
    max_parallel_requests: Optional[int] = None
    duration: str = "1h"
    metadata: dict = {}
-    tpm_limit: int = sys.maxsize
-    rpm_limit: int = sys.maxsize
+    tpm_limit: Optional[int] = None
+    rpm_limit: Optional[int] = None


 class GenerateKeyResponse(LiteLLMBase):
--- a/litellm/proxy/hooks/parallel_request_limiter.py
+++ b/litellm/proxy/hooks/parallel_request_limiter.py
@ -29,8 +29,8 @@ class MaxParallelRequestsHandler(CustomLogger):
        self.print_verbose(f"Inside Max Parallel Request Pre-Call Hook")
        api_key = user_api_key_dict.api_key
        max_parallel_requests = user_api_key_dict.max_parallel_requests or sys.maxsize
-        tpm_limit = user_api_key_dict.tpm_limit
-        rpm_limit = user_api_key_dict.rpm_limit
+        tpm_limit = user_api_key_dict.tpm_limit or sys.maxsize
+        rpm_limit = user_api_key_dict.rpm_limit or sys.maxsize

        if api_key is None:
            return
--- a/tests/README.MD
+++ b/tests/README.MD
@ -0,0 +1 @@
+Most tests are in `/litellm/tests`. These are just the tests for the proxy docker image, used for circle ci. 
--- a/tests/test_chat_completion.py
+++ b/tests/test_chat_completion.py
@ -0,0 +1,58 @@
+# What this tests ?
+## Tests /chat/completions by generating a key and then making a chat completions request
+import pytest
+import asyncio
+import aiohttp
+
+
+async def generate_key(session):
+    url = "http://0.0.0.0:4000/key/generate"
+    headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
+    data = {
+        "models": ["gpt-4"],
+        "duration": None,
+    }
+
+    async with session.post(url, headers=headers, json=data) as response:
+        status = response.status
+        response_text = await response.text()
+
+        print(response_text)
+        print()
+
+        if status != 200:
+            raise Exception(f"Request did not return a 200 status code: {status}")
+        return await response.json()
+
+
+async def chat_completion(session, key):
+    url = "http://0.0.0.0:4000/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {key}",
+        "Content-Type": "application/json",
+    }
+    data = {
+        "model": "gpt-4",
+        "messages": [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "Hello!"},
+        ],
+    }
+
+    async with session.post(url, headers=headers, json=data) as response:
+        status = response.status
+        response_text = await response.text()
+
+        print(response_text)
+        print()
+
+        if status != 200:
+            raise Exception(f"Request did not return a 200 status code: {status}")
+
+
+@pytest.mark.asyncio
+async def test_key_gen():
+    async with aiohttp.ClientSession() as session:
+        key_gen = await generate_key(session=session)
+        key = key_gen["key"]
+        await chat_completion(session=session, key=key)
--- a/tests/test_parallel_key_gen.py
+++ b/tests/test_parallel_key_gen.py
@ -0,0 +1,33 @@
+# What this tests ?
+## Tests /key/generate by making 10 parallel requests, and asserting all are successful
+import pytest
+import asyncio
+import aiohttp
+
+
+async def generate_key(session, i):
+    url = "http://0.0.0.0:4000/key/generate"
+    headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
+    data = {
+        "models": ["azure-models"],
+        "aliases": {"mistral-7b": "gpt-3.5-turbo"},
+        "duration": None,
+    }
+
+    async with session.post(url, headers=headers, json=data) as response:
+        status = response.status
+        response_text = await response.text()
+
+        print(f"Response {i} (Status code: {status}):")
+        print(response_text)
+        print()
+
+        if status != 200:
+            raise Exception(f"Request {i} did not return a 200 status code: {status}")
+
+
+@pytest.mark.asyncio
+async def test_key_gen():
+    async with aiohttp.ClientSession() as session:
+        tasks = [generate_key(session, i) for i in range(1, 11)]
+        await asyncio.gather(*tasks)
				`@ -0,0 +1 @@`
				Most tests are in `/litellm/tests`. These are just the tests for the proxy docker image, used for circle ci.