forked from phoenix/litellm-mirror
fix(parallel_request_limiter.py): handle tpm/rpm limits being null
This commit is contained in:
parent
62e35f6a14
commit
1a29272b47
5 changed files with 100 additions and 8 deletions
|
@ -132,8 +132,8 @@ class GenerateKeyRequest(LiteLLMBase):
|
||||||
team_id: Optional[str] = None
|
team_id: Optional[str] = None
|
||||||
max_parallel_requests: Optional[int] = None
|
max_parallel_requests: Optional[int] = None
|
||||||
metadata: Optional[dict] = {}
|
metadata: Optional[dict] = {}
|
||||||
tpm_limit: int = sys.maxsize
|
tpm_limit: Optional[int] = None
|
||||||
rpm_limit: int = sys.maxsize
|
rpm_limit: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
class UpdateKeyRequest(LiteLLMBase):
|
class UpdateKeyRequest(LiteLLMBase):
|
||||||
|
@ -148,8 +148,8 @@ class UpdateKeyRequest(LiteLLMBase):
|
||||||
user_id: Optional[str] = None
|
user_id: Optional[str] = None
|
||||||
max_parallel_requests: Optional[int] = None
|
max_parallel_requests: Optional[int] = None
|
||||||
metadata: Optional[dict] = None
|
metadata: Optional[dict] = None
|
||||||
tpm_limit: int = sys.maxsize
|
tpm_limit: Optional[int] = None
|
||||||
rpm_limit: int = sys.maxsize
|
rpm_limit: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
class UserAPIKeyAuth(LiteLLMBase): # the expected response object for user api key auth
|
class UserAPIKeyAuth(LiteLLMBase): # the expected response object for user api key auth
|
||||||
|
@ -166,8 +166,8 @@ class UserAPIKeyAuth(LiteLLMBase): # the expected response object for user api
|
||||||
max_parallel_requests: Optional[int] = None
|
max_parallel_requests: Optional[int] = None
|
||||||
duration: str = "1h"
|
duration: str = "1h"
|
||||||
metadata: dict = {}
|
metadata: dict = {}
|
||||||
tpm_limit: int = sys.maxsize
|
tpm_limit: Optional[int] = None
|
||||||
rpm_limit: int = sys.maxsize
|
rpm_limit: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
class GenerateKeyResponse(LiteLLMBase):
|
class GenerateKeyResponse(LiteLLMBase):
|
||||||
|
|
|
@ -29,8 +29,8 @@ class MaxParallelRequestsHandler(CustomLogger):
|
||||||
self.print_verbose(f"Inside Max Parallel Request Pre-Call Hook")
|
self.print_verbose(f"Inside Max Parallel Request Pre-Call Hook")
|
||||||
api_key = user_api_key_dict.api_key
|
api_key = user_api_key_dict.api_key
|
||||||
max_parallel_requests = user_api_key_dict.max_parallel_requests or sys.maxsize
|
max_parallel_requests = user_api_key_dict.max_parallel_requests or sys.maxsize
|
||||||
tpm_limit = user_api_key_dict.tpm_limit
|
tpm_limit = user_api_key_dict.tpm_limit or sys.maxsize
|
||||||
rpm_limit = user_api_key_dict.rpm_limit
|
rpm_limit = user_api_key_dict.rpm_limit or sys.maxsize
|
||||||
|
|
||||||
if api_key is None:
|
if api_key is None:
|
||||||
return
|
return
|
||||||
|
|
1
tests/README.MD
Normal file
1
tests/README.MD
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Most tests are in `/litellm/tests`. These are just the tests for the proxy docker image, used for circle ci.
|
58
tests/test_chat_completion.py
Normal file
58
tests/test_chat_completion.py
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
# What this tests ?
|
||||||
|
## Tests /chat/completions by generating a key and then making a chat completions request
|
||||||
|
import pytest
|
||||||
|
import asyncio
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
|
|
||||||
|
async def generate_key(session):
|
||||||
|
url = "http://0.0.0.0:4000/key/generate"
|
||||||
|
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
|
||||||
|
data = {
|
||||||
|
"models": ["gpt-4"],
|
||||||
|
"duration": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
async with session.post(url, headers=headers, json=data) as response:
|
||||||
|
status = response.status
|
||||||
|
response_text = await response.text()
|
||||||
|
|
||||||
|
print(response_text)
|
||||||
|
print()
|
||||||
|
|
||||||
|
if status != 200:
|
||||||
|
raise Exception(f"Request did not return a 200 status code: {status}")
|
||||||
|
return await response.json()
|
||||||
|
|
||||||
|
|
||||||
|
async def chat_completion(session, key):
|
||||||
|
url = "http://0.0.0.0:4000/chat/completions"
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
}
|
||||||
|
data = {
|
||||||
|
"model": "gpt-4",
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
{"role": "user", "content": "Hello!"},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
async with session.post(url, headers=headers, json=data) as response:
|
||||||
|
status = response.status
|
||||||
|
response_text = await response.text()
|
||||||
|
|
||||||
|
print(response_text)
|
||||||
|
print()
|
||||||
|
|
||||||
|
if status != 200:
|
||||||
|
raise Exception(f"Request did not return a 200 status code: {status}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_key_gen():
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
key_gen = await generate_key(session=session)
|
||||||
|
key = key_gen["key"]
|
||||||
|
await chat_completion(session=session, key=key)
|
33
tests/test_parallel_key_gen.py
Normal file
33
tests/test_parallel_key_gen.py
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
# What this tests ?
|
||||||
|
## Tests /key/generate by making 10 parallel requests, and asserting all are successful
|
||||||
|
import pytest
|
||||||
|
import asyncio
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
|
|
||||||
|
async def generate_key(session, i):
|
||||||
|
url = "http://0.0.0.0:4000/key/generate"
|
||||||
|
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
|
||||||
|
data = {
|
||||||
|
"models": ["azure-models"],
|
||||||
|
"aliases": {"mistral-7b": "gpt-3.5-turbo"},
|
||||||
|
"duration": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
async with session.post(url, headers=headers, json=data) as response:
|
||||||
|
status = response.status
|
||||||
|
response_text = await response.text()
|
||||||
|
|
||||||
|
print(f"Response {i} (Status code: {status}):")
|
||||||
|
print(response_text)
|
||||||
|
print()
|
||||||
|
|
||||||
|
if status != 200:
|
||||||
|
raise Exception(f"Request {i} did not return a 200 status code: {status}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_key_gen():
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
tasks = [generate_key(session, i) for i in range(1, 11)]
|
||||||
|
await asyncio.gather(*tasks)
|
Loading…
Add table
Add a link
Reference in a new issue