(fix) load test run over 300s

This commit is contained in:
ishaan-jaff 2024-03-14 19:44:31 -07:00
parent 1b63748831
commit 04ef2f2023
3 changed files with 35 additions and 39 deletions

View file

@ -16,7 +16,7 @@ jobs:
URL: "https://litellm-api.up.railway.app/" URL: "https://litellm-api.up.railway.app/"
USERS: "100" USERS: "100"
RATE: "10" RATE: "10"
RUNTIME: "60s" RUNTIME: "300s"
- name: Upload CSV as Asset to Latest Release - name: Upload CSV as Asset to Latest Release
uses: xresloader/upload-to-github-release@v1 uses: xresloader/upload-to-github-release@v1
env: env:

View file

@ -1,14 +1,16 @@
from locust import HttpUser, task, between from locust import HttpUser, task, between, events
import json
import time
class MyUser(HttpUser): class MyUser(HttpUser):
wait_time = between(1, 5) wait_time = between(1, 5)
@task @task(3)
def chat_completion(self): def chat_completion(self):
headers = { headers = {
"Content-Type": "application/json", "Content-Type": "application/json",
"Authorization": f"Bearer sk-1234", "Authorization": f"Bearer sk-mh3YNUDs1d_f6fMXfvEqBA",
# Include any additional headers you may need for authentication, etc. # Include any additional headers you may need for authentication, etc.
} }
@ -26,3 +28,31 @@ class MyUser(HttpUser):
response = self.client.post("chat/completions", json=payload, headers=headers) response = self.client.post("chat/completions", json=payload, headers=headers)
# Print or log the response if needed # Print or log the response if needed
@task(10)
def health_readiness(self):
start_time = time.time()
response = self.client.get("health/readiness")
response_time = time.time() - start_time
if response_time > 1:
events.request_failure.fire(
request_type="GET",
name="health/readiness",
response_time=response_time,
exception=None,
response=response,
)
@task(10)
def health_liveliness(self):
start_time = time.time()
response = self.client.get("health/liveliness")
response_time = time.time() - start_time
if response_time > 1:
events.request_failure.fire(
request_type="GET",
name="health/liveliness",
response_time=response_time,
exception=None,
response=response,
)

View file

@ -6,7 +6,7 @@ import time
class MyUser(HttpUser): class MyUser(HttpUser):
wait_time = between(1, 5) wait_time = between(1, 5)
@task @task(3)
def chat_completion(self): def chat_completion(self):
headers = { headers = {
"Content-Type": "application/json", "Content-Type": "application/json",
@ -56,37 +56,3 @@ class MyUser(HttpUser):
exception=None, exception=None,
response=response, response=response,
) )
# @task
# def key_generate(self):
# headers = {
# "Authorization": "Bearer sk-1234",
# "Content-Type": "application/json",
# }
# payload = {
# "models": ["gpt-3.5-turbo", "gpt-4", "claude-2"],
# "duration": "20m",
# "metadata": {"user": "ishaan@berri.ai"},
# "team_id": "core-infra",
# "max_budget": 10,
# "soft_budget": 5,
# }
# response = self.client.post("key/generate", json=payload, headers=headers)
# if response.status_code == 200:
# key_response = response.json()
# models = key_response.get("models", [])
# if models:
# # Use the first model from the key generation response to make a chat completions request
# model_to_use = models[0]
# chat_payload = {
# "model": model_to_use,
# "messages": [
# {"role": "system", "content": "You are a chat bot."},
# {"role": "user", "content": "Hello, how are you?"},
# ],
# }
# chat_response = self.client.post("chat/completions", json=chat_payload, headers=headers)
# # Print or log the chat response if needed