fix(proxy_server.py): prisma client fixes for high traffic

2024-02-06 17:30:36 -08:00 · 2024-02-06 17:30:36 -08:00 · b6adeec347
commit b6adeec347
parent d1549cb2f3
6 changed files with 224 additions and 114 deletions
--- a/tests/test_keys.py
+++ b/tests/test_keys.py
@ -490,7 +490,7 @@ async def test_key_crossing_budget():


@pytest.mark.asyncio
-async def test_key_zinfo_spend_values_sagemaker():
+async def test_key_info_spend_values_sagemaker():
    """
    Tests the sync streaming loop to ensure spend is correctly calculated.
    - create key
--- a/tests/test_spend_logs.py
+++ b/tests/test_spend_logs.py
@ -1,7 +1,7 @@
 # What this tests?
 ## Tests /spend endpoints.

-import pytest
+import pytest, time, uuid
 import asyncio
 import aiohttp

@ -26,17 +26,17 @@ async def generate_key(session, models=[]):
        return await response.json()


-async def chat_completion(session, key):
+async def chat_completion(session, key, model="gpt-3.5-turbo"):
    url = "http://0.0.0.0:4000/chat/completions"
    headers = {
        "Authorization": f"Bearer {key}",
        "Content-Type": "application/json",
    }
    data = {
-        "model": "gpt-3.5-turbo",
+        "model": model,
        "messages": [
            {"role": "system", "content": "You are a helpful assistant."},
-            {"role": "user", "content": "Hello!"},
+            {"role": "user", "content": f"Hello! {uuid.uuid4()}"},
        ],
    }

@ -53,8 +53,37 @@ async def chat_completion(session, key):
        return await response.json()


-async def get_spend_logs(session, request_id):
-    url = f"http://0.0.0.0:4000/spend/logs?request_id={request_id}"
+async def chat_completion_high_traffic(session, key, model="gpt-3.5-turbo"):
+    url = "http://0.0.0.0:4000/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {key}",
+        "Content-Type": "application/json",
+    }
+    data = {
+        "model": model,
+        "messages": [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": f"Hello! {uuid.uuid4()}"},
+        ],
+    }
+    try:
+        async with session.post(url, headers=headers, json=data) as response:
+            status = response.status
+            response_text = await response.text()
+
+            if status != 200:
+                raise Exception(f"Request did not return a 200 status code: {status}")
+
+            return await response.json()
+    except Exception as e:
+        return None
+
+
+async def get_spend_logs(session, request_id=None, api_key=None):
+    if api_key is not None:
+        url = f"http://0.0.0.0:4000/spend/logs?api_key={api_key}"
+    else:
+        url = f"http://0.0.0.0:4000/spend/logs?request_id={request_id}"
    headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}

    async with session.get(url, headers=headers) as response:
@ -82,3 +111,53 @@ async def test_spend_logs():
        response = await chat_completion(session=session, key=key)
        await asyncio.sleep(5)
        await get_spend_logs(session=session, request_id=response["id"])
+
+
+@pytest.mark.asyncio
+async def test_spend_logs_high_traffic():
+    """
+    - Create key
+    - Make 30 concurrent calls
+    - Get all logs for that key
+    - Wait 10s
+    - Assert it's 30
+    """
+
+    async def retry_request(func, *args, _max_attempts=5, **kwargs):
+        for attempt in range(_max_attempts):
+            try:
+                return await func(*args, **kwargs)
+            except (
+                aiohttp.client_exceptions.ClientOSError,
+                aiohttp.client_exceptions.ServerDisconnectedError,
+            ) as e:
+                if attempt + 1 == _max_attempts:
+                    raise  # re-raise the last ClientOSError if all attempts failed
+                print(f"Attempt {attempt+1} failed, retrying...")
+
+    async with aiohttp.ClientSession(
+        timeout=aiohttp.ClientTimeout(total=600)
+    ) as session:
+        start = time.time()
+        key_gen = await generate_key(session=session)
+        key = key_gen["key"]
+        n = 1000
+        tasks = [
+            retry_request(
+                chat_completion_high_traffic,
+                session=session,
+                key=key,
+                model="azure-gpt-3.5",
+            )
+            for _ in range(n)
+        ]
+        chat_completions = await asyncio.gather(*tasks)
+        successful_completions = [c for c in chat_completions if c is not None]
+        print(f"Num successful completions: {len(successful_completions)}")
+        await asyncio.sleep(10)
+        response = await get_spend_logs(session=session, api_key=key)
+        print(f"response: {response}")
+        print(f"len responses: {len(response)}")
+        assert len(response) == n
+        print(n, time.time() - start, len(response))
+        raise Exception("it worked!")