mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
Merge pull request #1860 from BerriAI/litellm_spend_logging_high_traffic
fix(proxy_server.py): prisma client fixes for high traffic
This commit is contained in:
commit
f785aee0df
7 changed files with 253 additions and 112 deletions
|
@ -113,7 +113,7 @@ class LangFuseLogger:
|
||||||
elif response_obj is not None:
|
elif response_obj is not None:
|
||||||
input = prompt
|
input = prompt
|
||||||
output = response_obj["choices"][0]["message"].json()
|
output = response_obj["choices"][0]["message"].json()
|
||||||
print(f"OUTPUT IN LANGFUSE: {output}; original: {response_obj}")
|
print_verbose(f"OUTPUT IN LANGFUSE: {output}; original: {response_obj}")
|
||||||
if self._is_langfuse_v2():
|
if self._is_langfuse_v2():
|
||||||
self._log_langfuse_v2(
|
self._log_langfuse_v2(
|
||||||
user_id,
|
user_id,
|
||||||
|
|
|
@ -391,6 +391,10 @@ class LiteLLM_SpendLogs(LiteLLMBase):
|
||||||
startTime: Union[str, datetime, None]
|
startTime: Union[str, datetime, None]
|
||||||
endTime: Union[str, datetime, None]
|
endTime: Union[str, datetime, None]
|
||||||
user: Optional[str] = ""
|
user: Optional[str] = ""
|
||||||
metadata: Optional[Json] = {}
|
metadata: Optional[dict] = {}
|
||||||
cache_hit: Optional[str] = "False"
|
cache_hit: Optional[str] = "False"
|
||||||
cache_key: Optional[str] = None
|
cache_key: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class LiteLLM_SpendLogs_ResponseObject(LiteLLMBase):
|
||||||
|
response: Optional[List[Union[LiteLLM_SpendLogs, Any]]] = None
|
||||||
|
|
|
@ -5,6 +5,7 @@ import random
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import importlib
|
import importlib
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
import urllib.parse as urlparse
|
||||||
|
|
||||||
sys.path.append(os.getcwd())
|
sys.path.append(os.getcwd())
|
||||||
|
|
||||||
|
@ -17,6 +18,15 @@ import shutil
|
||||||
telemetry = None
|
telemetry = None
|
||||||
|
|
||||||
|
|
||||||
|
def append_query_params(url, params):
|
||||||
|
parsed_url = urlparse.urlparse(url)
|
||||||
|
parsed_query = urlparse.parse_qs(parsed_url.query)
|
||||||
|
parsed_query.update(params)
|
||||||
|
encoded_query = urlparse.urlencode(parsed_query, doseq=True)
|
||||||
|
modified_url = urlparse.urlunparse(parsed_url._replace(query=encoded_query))
|
||||||
|
return modified_url
|
||||||
|
|
||||||
|
|
||||||
def run_ollama_serve():
|
def run_ollama_serve():
|
||||||
try:
|
try:
|
||||||
command = ["ollama", "serve"]
|
command = ["ollama", "serve"]
|
||||||
|
@ -416,6 +426,12 @@ def run_server(
|
||||||
|
|
||||||
if os.getenv("DATABASE_URL", None) is not None:
|
if os.getenv("DATABASE_URL", None) is not None:
|
||||||
try:
|
try:
|
||||||
|
### add connection pool + pool timeout args
|
||||||
|
params = {"connection_limit": 500, "pool_timeout": 60}
|
||||||
|
database_url = os.getenv("DATABASE_URL")
|
||||||
|
modified_url = append_query_params(database_url, params)
|
||||||
|
os.environ["DATABASE_URL"] = modified_url
|
||||||
|
###
|
||||||
subprocess.run(["prisma"], capture_output=True)
|
subprocess.run(["prisma"], capture_output=True)
|
||||||
is_prisma_runnable = True
|
is_prisma_runnable = True
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
|
@ -522,6 +538,5 @@ def run_server(
|
||||||
).run() # Run gunicorn
|
).run() # Run gunicorn
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
run_server()
|
run_server()
|
||||||
|
|
|
@ -436,10 +436,7 @@ async def user_api_key_auth(
|
||||||
## Check 2.1 If global proxy is in budget
|
## Check 2.1 If global proxy is in budget
|
||||||
## Check 2.2 [OPTIONAL - checked only if litellm.max_user_budget is not None] If 'user' passed in /chat/completions is in budget
|
## Check 2.2 [OPTIONAL - checked only if litellm.max_user_budget is not None] If 'user' passed in /chat/completions is in budget
|
||||||
if valid_token.user_id is not None:
|
if valid_token.user_id is not None:
|
||||||
user_id_list = [
|
user_id_list = [valid_token.user_id, litellm_proxy_budget_name]
|
||||||
valid_token.user_id,
|
|
||||||
litellm_proxy_budget_name,
|
|
||||||
]
|
|
||||||
if (
|
if (
|
||||||
litellm.max_user_budget is not None
|
litellm.max_user_budget is not None
|
||||||
): # Check if 'user' passed in /chat/completions is in budget, only checked if litellm.max_user_budget is set
|
): # Check if 'user' passed in /chat/completions is in budget, only checked if litellm.max_user_budget is set
|
||||||
|
@ -448,17 +445,36 @@ async def user_api_key_auth(
|
||||||
if user_passed_to_chat_completions is not None:
|
if user_passed_to_chat_completions is not None:
|
||||||
user_id_list.append(user_passed_to_chat_completions)
|
user_id_list.append(user_passed_to_chat_completions)
|
||||||
|
|
||||||
|
user_id_information = None
|
||||||
|
for id in user_id_list:
|
||||||
|
value = user_api_key_cache.get_cache(key=id)
|
||||||
|
if value is not None:
|
||||||
|
if user_id_information is None:
|
||||||
|
user_id_information = []
|
||||||
|
user_id_information.append(value)
|
||||||
|
if user_id_information is None or (
|
||||||
|
isinstance(user_id_information, list)
|
||||||
|
and len(user_id_information) < 2
|
||||||
|
):
|
||||||
if prisma_client is not None:
|
if prisma_client is not None:
|
||||||
user_id_information = await prisma_client.get_data(
|
user_id_information = await prisma_client.get_data(
|
||||||
user_id_list=user_id_list,
|
user_id_list=[
|
||||||
|
valid_token.user_id,
|
||||||
|
litellm_proxy_budget_name,
|
||||||
|
],
|
||||||
table_name="user",
|
table_name="user",
|
||||||
query_type="find_all",
|
query_type="find_all",
|
||||||
)
|
)
|
||||||
|
for _id in user_id_information:
|
||||||
|
user_api_key_cache.set_cache(
|
||||||
|
key=_id["user_id"], value=_id, ttl=600
|
||||||
|
)
|
||||||
if custom_db_client is not None:
|
if custom_db_client is not None:
|
||||||
user_id_information = await custom_db_client.get_data(
|
user_id_information = await custom_db_client.get_data(
|
||||||
key=valid_token.user_id, table_name="user"
|
key=valid_token.user_id, table_name="user"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
verbose_proxy_logger.debug(
|
verbose_proxy_logger.debug(
|
||||||
f"user_id_information: {user_id_information}"
|
f"user_id_information: {user_id_information}"
|
||||||
)
|
)
|
||||||
|
@ -566,7 +582,7 @@ async def user_api_key_auth(
|
||||||
api_key = valid_token.token
|
api_key = valid_token.token
|
||||||
|
|
||||||
# Add hashed token to cache
|
# Add hashed token to cache
|
||||||
user_api_key_cache.set_cache(key=api_key, value=valid_token, ttl=60)
|
user_api_key_cache.set_cache(key=api_key, value=valid_token, ttl=600)
|
||||||
valid_token_dict = _get_pydantic_json_dict(valid_token)
|
valid_token_dict = _get_pydantic_json_dict(valid_token)
|
||||||
valid_token_dict.pop("token", None)
|
valid_token_dict.pop("token", None)
|
||||||
"""
|
"""
|
||||||
|
@ -856,6 +872,10 @@ async def update_database(
|
||||||
f"Enters prisma db call, response_cost: {response_cost}, token: {token}; user_id: {user_id}"
|
f"Enters prisma db call, response_cost: {response_cost}, token: {token}; user_id: {user_id}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
### [TODO] STEP 1: GET KEY + USER SPEND ### (key, user)
|
||||||
|
|
||||||
|
### [TODO] STEP 2: UPDATE SPEND ### (key, user, spend logs)
|
||||||
|
|
||||||
### UPDATE USER SPEND ###
|
### UPDATE USER SPEND ###
|
||||||
async def _update_user_db():
|
async def _update_user_db():
|
||||||
"""
|
"""
|
||||||
|
@ -885,12 +905,23 @@ async def update_database(
|
||||||
existing_spend_obj = LiteLLM_UserTable(
|
existing_spend_obj = LiteLLM_UserTable(
|
||||||
user_id=id, spend=0, max_budget=max_user_budget, user_email=None
|
user_id=id, spend=0, max_budget=max_user_budget, user_email=None
|
||||||
)
|
)
|
||||||
|
if existing_spend_obj is None:
|
||||||
|
existing_spend = 0
|
||||||
|
existing_spend_obj = LiteLLM_UserTable(
|
||||||
|
user_id=id, spend=0, max_budget=None, user_email=None
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
existing_spend = existing_spend_obj.spend
|
existing_spend = existing_spend_obj.spend
|
||||||
|
|
||||||
# Calculate the new cost by adding the existing cost and response_cost
|
# Calculate the new cost by adding the existing cost and response_cost
|
||||||
existing_spend_obj.spend = existing_spend + response_cost
|
existing_spend_obj.spend = existing_spend + response_cost
|
||||||
|
|
||||||
|
valid_token = user_api_key_cache.get_cache(key=id)
|
||||||
|
if valid_token is not None and isinstance(valid_token, dict):
|
||||||
|
user_api_key_cache.set_cache(
|
||||||
|
key=id, value=existing_spend_obj.json()
|
||||||
|
)
|
||||||
|
|
||||||
verbose_proxy_logger.debug(f"new cost: {existing_spend_obj.spend}")
|
verbose_proxy_logger.debug(f"new cost: {existing_spend_obj.spend}")
|
||||||
data_list.append(existing_spend_obj)
|
data_list.append(existing_spend_obj)
|
||||||
|
|
||||||
|
@ -904,9 +935,12 @@ async def update_database(
|
||||||
await custom_db_client.update_data(
|
await custom_db_client.update_data(
|
||||||
key=user_id, value={"spend": new_spend}, table_name="user"
|
key=user_id, value={"spend": new_spend}, table_name="user"
|
||||||
)
|
)
|
||||||
|
except Exception as e:
|
||||||
|
verbose_proxy_logger.info(f"Update User DB call failed to execute")
|
||||||
|
|
||||||
### UPDATE KEY SPEND ###
|
### UPDATE KEY SPEND ###
|
||||||
async def _update_key_db():
|
async def _update_key_db():
|
||||||
|
try:
|
||||||
verbose_proxy_logger.debug(
|
verbose_proxy_logger.debug(
|
||||||
f"adding spend to key db. Response cost: {response_cost}. Token: {token}."
|
f"adding spend to key db. Response cost: {response_cost}. Token: {token}."
|
||||||
)
|
)
|
||||||
|
@ -925,7 +959,9 @@ async def update_database(
|
||||||
|
|
||||||
verbose_proxy_logger.debug(f"new cost: {new_spend}")
|
verbose_proxy_logger.debug(f"new cost: {new_spend}")
|
||||||
# Update the cost column for the given token
|
# Update the cost column for the given token
|
||||||
await prisma_client.update_data(token=token, data={"spend": new_spend})
|
await prisma_client.update_data(
|
||||||
|
token=token, data={"spend": new_spend}
|
||||||
|
)
|
||||||
|
|
||||||
valid_token = user_api_key_cache.get_cache(key=token)
|
valid_token = user_api_key_cache.get_cache(key=token)
|
||||||
if valid_token is not None:
|
if valid_token is not None:
|
||||||
|
@ -956,9 +992,12 @@ async def update_database(
|
||||||
if valid_token is not None:
|
if valid_token is not None:
|
||||||
valid_token.spend = new_spend
|
valid_token.spend = new_spend
|
||||||
user_api_key_cache.set_cache(key=token, value=valid_token)
|
user_api_key_cache.set_cache(key=token, value=valid_token)
|
||||||
|
except Exception as e:
|
||||||
|
verbose_proxy_logger.info(f"Update Key DB Call failed to execute")
|
||||||
|
|
||||||
### UPDATE SPEND LOGS ###
|
### UPDATE SPEND LOGS ###
|
||||||
async def _insert_spend_log_to_db():
|
async def _insert_spend_log_to_db():
|
||||||
|
try:
|
||||||
# Helper to generate payload to log
|
# Helper to generate payload to log
|
||||||
verbose_proxy_logger.debug("inserting spend log to db")
|
verbose_proxy_logger.debug("inserting spend log to db")
|
||||||
payload = get_logging_payload(
|
payload = get_logging_payload(
|
||||||
|
@ -974,6 +1013,8 @@ async def update_database(
|
||||||
|
|
||||||
elif custom_db_client is not None:
|
elif custom_db_client is not None:
|
||||||
await custom_db_client.insert_data(payload, table_name="spend")
|
await custom_db_client.insert_data(payload, table_name="spend")
|
||||||
|
except Exception as e:
|
||||||
|
verbose_proxy_logger.info(f"Update Spend Logs DB failed to execute")
|
||||||
|
|
||||||
asyncio.create_task(_update_user_db())
|
asyncio.create_task(_update_user_db())
|
||||||
asyncio.create_task(_update_key_db())
|
asyncio.create_task(_update_key_db())
|
||||||
|
@ -1563,7 +1604,7 @@ async def generate_key_helper_fn(
|
||||||
user_api_key_cache.set_cache(
|
user_api_key_cache.set_cache(
|
||||||
key=hashed_token,
|
key=hashed_token,
|
||||||
value=LiteLLM_VerificationToken(**saved_token), # type: ignore
|
value=LiteLLM_VerificationToken(**saved_token), # type: ignore
|
||||||
ttl=60,
|
ttl=600,
|
||||||
)
|
)
|
||||||
if prisma_client is not None:
|
if prisma_client is not None:
|
||||||
## CREATE USER (If necessary)
|
## CREATE USER (If necessary)
|
||||||
|
@ -1963,7 +2004,7 @@ async def startup_event():
|
||||||
### START BUDGET SCHEDULER ###
|
### START BUDGET SCHEDULER ###
|
||||||
scheduler = AsyncIOScheduler()
|
scheduler = AsyncIOScheduler()
|
||||||
interval = random.randint(
|
interval = random.randint(
|
||||||
7, 14
|
597, 605
|
||||||
) # random interval, so multiple workers avoid resetting budget at the same time
|
) # random interval, so multiple workers avoid resetting budget at the same time
|
||||||
scheduler.add_job(reset_budget, "interval", seconds=interval, args=[prisma_client])
|
scheduler.add_job(reset_budget, "interval", seconds=interval, args=[prisma_client])
|
||||||
scheduler.start()
|
scheduler.start()
|
||||||
|
@ -3042,6 +3083,9 @@ async def spend_user_fn(
|
||||||
"/spend/logs",
|
"/spend/logs",
|
||||||
tags=["budget & spend Tracking"],
|
tags=["budget & spend Tracking"],
|
||||||
dependencies=[Depends(user_api_key_auth)],
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
responses={
|
||||||
|
200: {"model": List[LiteLLM_SpendLogs]},
|
||||||
|
},
|
||||||
)
|
)
|
||||||
async def view_spend_logs(
|
async def view_spend_logs(
|
||||||
api_key: Optional[str] = fastapi.Query(
|
api_key: Optional[str] = fastapi.Query(
|
||||||
|
@ -3128,7 +3172,7 @@ async def view_spend_logs(
|
||||||
table_name="spend", query_type="find_all"
|
table_name="spend", query_type="find_all"
|
||||||
)
|
)
|
||||||
|
|
||||||
return spend_logs
|
return spend_log
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
|
@ -399,13 +399,7 @@ class PrismaClient:
|
||||||
# Now you can import the Prisma Client
|
# Now you can import the Prisma Client
|
||||||
from prisma import Prisma # type: ignore
|
from prisma import Prisma # type: ignore
|
||||||
|
|
||||||
self.db = Prisma(
|
self.db = Prisma() # Client to connect to Prisma db
|
||||||
http={
|
|
||||||
"limits": httpx.Limits(
|
|
||||||
max_connections=1000, max_keepalive_connections=100
|
|
||||||
)
|
|
||||||
}
|
|
||||||
) # Client to connect to Prisma db
|
|
||||||
|
|
||||||
def hash_token(self, token: str):
|
def hash_token(self, token: str):
|
||||||
# Hash the string using SHA-256
|
# Hash the string using SHA-256
|
||||||
|
|
|
@ -431,9 +431,9 @@ async def test_key_info_spend_values_image_generation():
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_key_with_budgets():
|
async def test_key_with_budgets():
|
||||||
"""
|
"""
|
||||||
- Create key with budget and 5s duration
|
- Create key with budget and 5min duration
|
||||||
- Get 'reset_at' value
|
- Get 'reset_at' value
|
||||||
- wait 5s
|
- wait 10min (budget reset runs every 10mins.)
|
||||||
- Check if value updated
|
- Check if value updated
|
||||||
"""
|
"""
|
||||||
from litellm.proxy.utils import hash_token
|
from litellm.proxy.utils import hash_token
|
||||||
|
@ -449,8 +449,8 @@ async def test_key_with_budgets():
|
||||||
reset_at_init_value = key_info["info"]["budget_reset_at"]
|
reset_at_init_value = key_info["info"]["budget_reset_at"]
|
||||||
reset_at_new_value = None
|
reset_at_new_value = None
|
||||||
i = 0
|
i = 0
|
||||||
|
await asyncio.sleep(610)
|
||||||
while i < 3:
|
while i < 3:
|
||||||
await asyncio.sleep(30)
|
|
||||||
key_info = await get_key_info(session=session, get_key=key, call_key=key)
|
key_info = await get_key_info(session=session, get_key=key, call_key=key)
|
||||||
reset_at_new_value = key_info["info"]["budget_reset_at"]
|
reset_at_new_value = key_info["info"]["budget_reset_at"]
|
||||||
try:
|
try:
|
||||||
|
@ -458,6 +458,7 @@ async def test_key_with_budgets():
|
||||||
break
|
break
|
||||||
except:
|
except:
|
||||||
i + 1
|
i + 1
|
||||||
|
await asyncio.sleep(5)
|
||||||
assert reset_at_init_value != reset_at_new_value
|
assert reset_at_init_value != reset_at_new_value
|
||||||
|
|
||||||
|
|
||||||
|
@ -481,7 +482,7 @@ async def test_key_crossing_budget():
|
||||||
|
|
||||||
response = await chat_completion(session=session, key=key)
|
response = await chat_completion(session=session, key=key)
|
||||||
print("response 1: ", response)
|
print("response 1: ", response)
|
||||||
await asyncio.sleep(2)
|
await asyncio.sleep(10)
|
||||||
try:
|
try:
|
||||||
response = await chat_completion(session=session, key=key)
|
response = await chat_completion(session=session, key=key)
|
||||||
pytest.fail("Should have failed - Key crossed it's budget")
|
pytest.fail("Should have failed - Key crossed it's budget")
|
||||||
|
@ -490,7 +491,7 @@ async def test_key_crossing_budget():
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_key_zinfo_spend_values_sagemaker():
|
async def test_key_info_spend_values_sagemaker():
|
||||||
"""
|
"""
|
||||||
Tests the sync streaming loop to ensure spend is correctly calculated.
|
Tests the sync streaming loop to ensure spend is correctly calculated.
|
||||||
- create key
|
- create key
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# What this tests?
|
# What this tests?
|
||||||
## Tests /spend endpoints.
|
## Tests /spend endpoints.
|
||||||
|
|
||||||
import pytest
|
import pytest, time, uuid
|
||||||
import asyncio
|
import asyncio
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
|
||||||
|
@ -26,17 +26,17 @@ async def generate_key(session, models=[]):
|
||||||
return await response.json()
|
return await response.json()
|
||||||
|
|
||||||
|
|
||||||
async def chat_completion(session, key):
|
async def chat_completion(session, key, model="gpt-3.5-turbo"):
|
||||||
url = "http://0.0.0.0:4000/chat/completions"
|
url = "http://0.0.0.0:4000/chat/completions"
|
||||||
headers = {
|
headers = {
|
||||||
"Authorization": f"Bearer {key}",
|
"Authorization": f"Bearer {key}",
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
}
|
}
|
||||||
data = {
|
data = {
|
||||||
"model": "gpt-3.5-turbo",
|
"model": model,
|
||||||
"messages": [
|
"messages": [
|
||||||
{"role": "system", "content": "You are a helpful assistant."},
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
{"role": "user", "content": "Hello!"},
|
{"role": "user", "content": f"Hello! {uuid.uuid4()}"},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -53,7 +53,36 @@ async def chat_completion(session, key):
|
||||||
return await response.json()
|
return await response.json()
|
||||||
|
|
||||||
|
|
||||||
async def get_spend_logs(session, request_id):
|
async def chat_completion_high_traffic(session, key, model="gpt-3.5-turbo"):
|
||||||
|
url = "http://0.0.0.0:4000/chat/completions"
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
}
|
||||||
|
data = {
|
||||||
|
"model": model,
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
{"role": "user", "content": f"Hello! {uuid.uuid4()}"},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
async with session.post(url, headers=headers, json=data) as response:
|
||||||
|
status = response.status
|
||||||
|
response_text = await response.text()
|
||||||
|
|
||||||
|
if status != 200:
|
||||||
|
raise Exception(f"Request did not return a 200 status code: {status}")
|
||||||
|
|
||||||
|
return await response.json()
|
||||||
|
except Exception as e:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def get_spend_logs(session, request_id=None, api_key=None):
|
||||||
|
if api_key is not None:
|
||||||
|
url = f"http://0.0.0.0:4000/spend/logs?api_key={api_key}"
|
||||||
|
else:
|
||||||
url = f"http://0.0.0.0:4000/spend/logs?request_id={request_id}"
|
url = f"http://0.0.0.0:4000/spend/logs?request_id={request_id}"
|
||||||
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
|
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
|
||||||
|
|
||||||
|
@ -82,3 +111,57 @@ async def test_spend_logs():
|
||||||
response = await chat_completion(session=session, key=key)
|
response = await chat_completion(session=session, key=key)
|
||||||
await asyncio.sleep(5)
|
await asyncio.sleep(5)
|
||||||
await get_spend_logs(session=session, request_id=response["id"])
|
await get_spend_logs(session=session, request_id=response["id"])
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="High traffic load test, meant to be run locally")
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_spend_logs_high_traffic():
|
||||||
|
"""
|
||||||
|
- Create key
|
||||||
|
- Make 30 concurrent calls
|
||||||
|
- Get all logs for that key
|
||||||
|
- Wait 10s
|
||||||
|
- Assert it's 30
|
||||||
|
"""
|
||||||
|
|
||||||
|
async def retry_request(func, *args, _max_attempts=5, **kwargs):
|
||||||
|
for attempt in range(_max_attempts):
|
||||||
|
try:
|
||||||
|
return await func(*args, **kwargs)
|
||||||
|
except (
|
||||||
|
aiohttp.client_exceptions.ClientOSError,
|
||||||
|
aiohttp.client_exceptions.ServerDisconnectedError,
|
||||||
|
) as e:
|
||||||
|
if attempt + 1 == _max_attempts:
|
||||||
|
raise # re-raise the last ClientOSError if all attempts failed
|
||||||
|
print(f"Attempt {attempt+1} failed, retrying...")
|
||||||
|
|
||||||
|
async with aiohttp.ClientSession(
|
||||||
|
timeout=aiohttp.ClientTimeout(total=600)
|
||||||
|
) as session:
|
||||||
|
start = time.time()
|
||||||
|
key_gen = await generate_key(session=session)
|
||||||
|
key = key_gen["key"]
|
||||||
|
n = 1000
|
||||||
|
tasks = [
|
||||||
|
retry_request(
|
||||||
|
chat_completion_high_traffic,
|
||||||
|
session=session,
|
||||||
|
key=key,
|
||||||
|
model="azure-gpt-3.5",
|
||||||
|
)
|
||||||
|
for _ in range(n)
|
||||||
|
]
|
||||||
|
chat_completions = await asyncio.gather(*tasks)
|
||||||
|
successful_completions = [c for c in chat_completions if c is not None]
|
||||||
|
print(f"Num successful completions: {len(successful_completions)}")
|
||||||
|
await asyncio.sleep(10)
|
||||||
|
try:
|
||||||
|
response = await retry_request(get_spend_logs, session=session, api_key=key)
|
||||||
|
print(f"response: {response}")
|
||||||
|
print(f"len responses: {len(response)}")
|
||||||
|
assert len(response) == n
|
||||||
|
print(n, time.time() - start, len(response))
|
||||||
|
except:
|
||||||
|
print(n, time.time() - start, 0)
|
||||||
|
raise Exception("it worked!")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue