forked from phoenix/litellm-mirror
(fix) load test run over 300s
This commit is contained in:
parent
1b63748831
commit
04ef2f2023
3 changed files with 35 additions and 39 deletions
2
.github/workflows/load_test.yml
vendored
2
.github/workflows/load_test.yml
vendored
|
@ -16,7 +16,7 @@ jobs:
|
||||||
URL: "https://litellm-api.up.railway.app/"
|
URL: "https://litellm-api.up.railway.app/"
|
||||||
USERS: "100"
|
USERS: "100"
|
||||||
RATE: "10"
|
RATE: "10"
|
||||||
RUNTIME: "60s"
|
RUNTIME: "300s"
|
||||||
- name: Upload CSV as Asset to Latest Release
|
- name: Upload CSV as Asset to Latest Release
|
||||||
uses: xresloader/upload-to-github-release@v1
|
uses: xresloader/upload-to-github-release@v1
|
||||||
env:
|
env:
|
||||||
|
|
36
.github/workflows/locustfile.py
vendored
36
.github/workflows/locustfile.py
vendored
|
@ -1,14 +1,16 @@
|
||||||
from locust import HttpUser, task, between
|
from locust import HttpUser, task, between, events
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
class MyUser(HttpUser):
|
class MyUser(HttpUser):
|
||||||
wait_time = between(1, 5)
|
wait_time = between(1, 5)
|
||||||
|
|
||||||
@task
|
@task(3)
|
||||||
def chat_completion(self):
|
def chat_completion(self):
|
||||||
headers = {
|
headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
"Authorization": f"Bearer sk-1234",
|
"Authorization": f"Bearer sk-mh3YNUDs1d_f6fMXfvEqBA",
|
||||||
# Include any additional headers you may need for authentication, etc.
|
# Include any additional headers you may need for authentication, etc.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -26,3 +28,31 @@ class MyUser(HttpUser):
|
||||||
response = self.client.post("chat/completions", json=payload, headers=headers)
|
response = self.client.post("chat/completions", json=payload, headers=headers)
|
||||||
|
|
||||||
# Print or log the response if needed
|
# Print or log the response if needed
|
||||||
|
|
||||||
|
@task(10)
|
||||||
|
def health_readiness(self):
|
||||||
|
start_time = time.time()
|
||||||
|
response = self.client.get("health/readiness")
|
||||||
|
response_time = time.time() - start_time
|
||||||
|
if response_time > 1:
|
||||||
|
events.request_failure.fire(
|
||||||
|
request_type="GET",
|
||||||
|
name="health/readiness",
|
||||||
|
response_time=response_time,
|
||||||
|
exception=None,
|
||||||
|
response=response,
|
||||||
|
)
|
||||||
|
|
||||||
|
@task(10)
|
||||||
|
def health_liveliness(self):
|
||||||
|
start_time = time.time()
|
||||||
|
response = self.client.get("health/liveliness")
|
||||||
|
response_time = time.time() - start_time
|
||||||
|
if response_time > 1:
|
||||||
|
events.request_failure.fire(
|
||||||
|
request_type="GET",
|
||||||
|
name="health/liveliness",
|
||||||
|
response_time=response_time,
|
||||||
|
exception=None,
|
||||||
|
response=response,
|
||||||
|
)
|
||||||
|
|
|
@ -6,7 +6,7 @@ import time
|
||||||
class MyUser(HttpUser):
|
class MyUser(HttpUser):
|
||||||
wait_time = between(1, 5)
|
wait_time = between(1, 5)
|
||||||
|
|
||||||
@task
|
@task(3)
|
||||||
def chat_completion(self):
|
def chat_completion(self):
|
||||||
headers = {
|
headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
|
@ -56,37 +56,3 @@ class MyUser(HttpUser):
|
||||||
exception=None,
|
exception=None,
|
||||||
response=response,
|
response=response,
|
||||||
)
|
)
|
||||||
|
|
||||||
# @task
|
|
||||||
# def key_generate(self):
|
|
||||||
# headers = {
|
|
||||||
# "Authorization": "Bearer sk-1234",
|
|
||||||
# "Content-Type": "application/json",
|
|
||||||
# }
|
|
||||||
|
|
||||||
# payload = {
|
|
||||||
# "models": ["gpt-3.5-turbo", "gpt-4", "claude-2"],
|
|
||||||
# "duration": "20m",
|
|
||||||
# "metadata": {"user": "ishaan@berri.ai"},
|
|
||||||
# "team_id": "core-infra",
|
|
||||||
# "max_budget": 10,
|
|
||||||
# "soft_budget": 5,
|
|
||||||
# }
|
|
||||||
|
|
||||||
# response = self.client.post("key/generate", json=payload, headers=headers)
|
|
||||||
|
|
||||||
# if response.status_code == 200:
|
|
||||||
# key_response = response.json()
|
|
||||||
# models = key_response.get("models", [])
|
|
||||||
# if models:
|
|
||||||
# # Use the first model from the key generation response to make a chat completions request
|
|
||||||
# model_to_use = models[0]
|
|
||||||
# chat_payload = {
|
|
||||||
# "model": model_to_use,
|
|
||||||
# "messages": [
|
|
||||||
# {"role": "system", "content": "You are a chat bot."},
|
|
||||||
# {"role": "user", "content": "Hello, how are you?"},
|
|
||||||
# ],
|
|
||||||
# }
|
|
||||||
# chat_response = self.client.post("chat/completions", json=chat_payload, headers=headers)
|
|
||||||
# # Print or log the chat response if needed
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue