diff --git a/.github/workflows/load_test.yml b/.github/workflows/load_test.yml index ed0c34fbd..4f978b648 100644 --- a/.github/workflows/load_test.yml +++ b/.github/workflows/load_test.yml @@ -16,7 +16,7 @@ jobs: URL: "https://litellm-api.up.railway.app/" USERS: "100" RATE: "10" - RUNTIME: "60s" + RUNTIME: "300s" - name: Upload CSV as Asset to Latest Release uses: xresloader/upload-to-github-release@v1 env: diff --git a/.github/workflows/locustfile.py b/.github/workflows/locustfile.py index 5efdca84d..99022dc34 100644 --- a/.github/workflows/locustfile.py +++ b/.github/workflows/locustfile.py @@ -1,14 +1,16 @@ -from locust import HttpUser, task, between +from locust import HttpUser, task, between, events +import json +import time class MyUser(HttpUser): wait_time = between(1, 5) - @task + @task(3) def chat_completion(self): headers = { "Content-Type": "application/json", - "Authorization": f"Bearer sk-1234", + "Authorization": f"Bearer sk-mh3YNUDs1d_f6fMXfvEqBA", # Include any additional headers you may need for authentication, etc. } @@ -26,3 +28,31 @@ class MyUser(HttpUser): response = self.client.post("chat/completions", json=payload, headers=headers) # Print or log the response if needed + + @task(10) + def health_readiness(self): + start_time = time.time() + response = self.client.get("health/readiness") + response_time = time.time() - start_time + if response_time > 1: + events.request_failure.fire( + request_type="GET", + name="health/readiness", + response_time=response_time, + exception=None, + response=response, + ) + + @task(10) + def health_liveliness(self): + start_time = time.time() + response = self.client.get("health/liveliness") + response_time = time.time() - start_time + if response_time > 1: + events.request_failure.fire( + request_type="GET", + name="health/liveliness", + response_time=response_time, + exception=None, + response=response, + ) diff --git a/litellm/proxy/proxy_load_test/locustfile.py b/litellm/proxy/proxy_load_test/locustfile.py index 220bd3553..99022dc34 100644 --- a/litellm/proxy/proxy_load_test/locustfile.py +++ b/litellm/proxy/proxy_load_test/locustfile.py @@ -6,7 +6,7 @@ import time class MyUser(HttpUser): wait_time = between(1, 5) - @task + @task(3) def chat_completion(self): headers = { "Content-Type": "application/json", @@ -56,37 +56,3 @@ class MyUser(HttpUser): exception=None, response=response, ) - - # @task - # def key_generate(self): - # headers = { - # "Authorization": "Bearer sk-1234", - # "Content-Type": "application/json", - # } - - # payload = { - # "models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], - # "duration": "20m", - # "metadata": {"user": "ishaan@berri.ai"}, - # "team_id": "core-infra", - # "max_budget": 10, - # "soft_budget": 5, - # } - - # response = self.client.post("key/generate", json=payload, headers=headers) - - # if response.status_code == 200: - # key_response = response.json() - # models = key_response.get("models", []) - # if models: - # # Use the first model from the key generation response to make a chat completions request - # model_to_use = models[0] - # chat_payload = { - # "model": model_to_use, - # "messages": [ - # {"role": "system", "content": "You are a chat bot."}, - # {"role": "user", "content": "Hello, how are you?"}, - # ], - # } - # chat_response = self.client.post("chat/completions", json=chat_payload, headers=headers) - # # Print or log the chat response if needed