fix(proxy_server.py): prisma client fixes for high traffic

This commit is contained in:
Krrish Dholakia 2024-02-06 17:30:36 -08:00
parent d1549cb2f3
commit b6adeec347
6 changed files with 224 additions and 114 deletions

View file

@ -490,7 +490,7 @@ async def test_key_crossing_budget():
@pytest.mark.asyncio
async def test_key_zinfo_spend_values_sagemaker():
async def test_key_info_spend_values_sagemaker():
"""
Tests the sync streaming loop to ensure spend is correctly calculated.
- create key

View file

@ -1,7 +1,7 @@
# What this tests?
## Tests /spend endpoints.
import pytest
import pytest, time, uuid
import asyncio
import aiohttp
@ -26,17 +26,17 @@ async def generate_key(session, models=[]):
return await response.json()
async def chat_completion(session, key):
async def chat_completion(session, key, model="gpt-3.5-turbo"):
url = "http://0.0.0.0:4000/chat/completions"
headers = {
"Authorization": f"Bearer {key}",
"Content-Type": "application/json",
}
data = {
"model": "gpt-3.5-turbo",
"model": model,
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"},
{"role": "user", "content": f"Hello! {uuid.uuid4()}"},
],
}
@ -53,8 +53,37 @@ async def chat_completion(session, key):
return await response.json()
async def get_spend_logs(session, request_id):
url = f"http://0.0.0.0:4000/spend/logs?request_id={request_id}"
async def chat_completion_high_traffic(session, key, model="gpt-3.5-turbo"):
url = "http://0.0.0.0:4000/chat/completions"
headers = {
"Authorization": f"Bearer {key}",
"Content-Type": "application/json",
}
data = {
"model": model,
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": f"Hello! {uuid.uuid4()}"},
],
}
try:
async with session.post(url, headers=headers, json=data) as response:
status = response.status
response_text = await response.text()
if status != 200:
raise Exception(f"Request did not return a 200 status code: {status}")
return await response.json()
except Exception as e:
return None
async def get_spend_logs(session, request_id=None, api_key=None):
if api_key is not None:
url = f"http://0.0.0.0:4000/spend/logs?api_key={api_key}"
else:
url = f"http://0.0.0.0:4000/spend/logs?request_id={request_id}"
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
async with session.get(url, headers=headers) as response:
@ -82,3 +111,53 @@ async def test_spend_logs():
response = await chat_completion(session=session, key=key)
await asyncio.sleep(5)
await get_spend_logs(session=session, request_id=response["id"])
@pytest.mark.asyncio
async def test_spend_logs_high_traffic():
"""
- Create key
- Make 30 concurrent calls
- Get all logs for that key
- Wait 10s
- Assert it's 30
"""
async def retry_request(func, *args, _max_attempts=5, **kwargs):
for attempt in range(_max_attempts):
try:
return await func(*args, **kwargs)
except (
aiohttp.client_exceptions.ClientOSError,
aiohttp.client_exceptions.ServerDisconnectedError,
) as e:
if attempt + 1 == _max_attempts:
raise # re-raise the last ClientOSError if all attempts failed
print(f"Attempt {attempt+1} failed, retrying...")
async with aiohttp.ClientSession(
timeout=aiohttp.ClientTimeout(total=600)
) as session:
start = time.time()
key_gen = await generate_key(session=session)
key = key_gen["key"]
n = 1000
tasks = [
retry_request(
chat_completion_high_traffic,
session=session,
key=key,
model="azure-gpt-3.5",
)
for _ in range(n)
]
chat_completions = await asyncio.gather(*tasks)
successful_completions = [c for c in chat_completions if c is not None]
print(f"Num successful completions: {len(successful_completions)}")
await asyncio.sleep(10)
response = await get_spend_logs(session=session, api_key=key)
print(f"response: {response}")
print(f"len responses: {len(response)}")
assert len(response) == n
print(n, time.time() - start, len(response))
raise Exception("it worked!")