diff --git a/.github/workflows/load_test.yml b/.github/workflows/load_test.yml
index ed0c34fbd..4f978b648 100644
--- a/.github/workflows/load_test.yml
+++ b/.github/workflows/load_test.yml
@@ -16,7 +16,7 @@ jobs:
           URL:  "https://litellm-api.up.railway.app/"
           USERS: "100"
           RATE: "10"
-          RUNTIME: "60s"
+          RUNTIME: "300s"
       - name: Upload CSV as Asset to Latest Release
         uses: xresloader/upload-to-github-release@v1
         env:
diff --git a/.github/workflows/locustfile.py b/.github/workflows/locustfile.py
index 5efdca84d..99022dc34 100644
--- a/.github/workflows/locustfile.py
+++ b/.github/workflows/locustfile.py
@@ -1,14 +1,16 @@
-from locust import HttpUser, task, between
+from locust import HttpUser, task, between, events
+import json
+import time
 
 
 class MyUser(HttpUser):
     wait_time = between(1, 5)
 
-    @task
+    @task(3)
     def chat_completion(self):
         headers = {
             "Content-Type": "application/json",
-            "Authorization": f"Bearer sk-1234",
+            "Authorization": f"Bearer sk-mh3YNUDs1d_f6fMXfvEqBA",
             # Include any additional headers you may need for authentication, etc.
         }
 
@@ -26,3 +28,31 @@ class MyUser(HttpUser):
         response = self.client.post("chat/completions", json=payload, headers=headers)
 
         # Print or log the response if needed
+
+    @task(10)
+    def health_readiness(self):
+        start_time = time.time()
+        response = self.client.get("health/readiness")
+        response_time = time.time() - start_time
+        if response_time > 1:
+            events.request_failure.fire(
+                request_type="GET",
+                name="health/readiness",
+                response_time=response_time,
+                exception=None,
+                response=response,
+            )
+
+    @task(10)
+    def health_liveliness(self):
+        start_time = time.time()
+        response = self.client.get("health/liveliness")
+        response_time = time.time() - start_time
+        if response_time > 1:
+            events.request_failure.fire(
+                request_type="GET",
+                name="health/liveliness",
+                response_time=response_time,
+                exception=None,
+                response=response,
+            )
diff --git a/litellm/proxy/proxy_load_test/locustfile.py b/litellm/proxy/proxy_load_test/locustfile.py
index 220bd3553..99022dc34 100644
--- a/litellm/proxy/proxy_load_test/locustfile.py
+++ b/litellm/proxy/proxy_load_test/locustfile.py
@@ -6,7 +6,7 @@ import time
 class MyUser(HttpUser):
     wait_time = between(1, 5)
 
-    @task
+    @task(3)
     def chat_completion(self):
         headers = {
             "Content-Type": "application/json",
@@ -56,37 +56,3 @@ class MyUser(HttpUser):
                 exception=None,
                 response=response,
             )
-
-    # @task
-    # def key_generate(self):
-    #     headers = {
-    #         "Authorization": "Bearer sk-1234",
-    #         "Content-Type": "application/json",
-    #     }
-
-    #     payload = {
-    #         "models": ["gpt-3.5-turbo", "gpt-4", "claude-2"],
-    #         "duration": "20m",
-    #         "metadata": {"user": "ishaan@berri.ai"},
-    #         "team_id": "core-infra",
-    #         "max_budget": 10,
-    #         "soft_budget": 5,
-    #     }
-
-    #     response = self.client.post("key/generate", json=payload, headers=headers)
-
-    #     if response.status_code == 200:
-    #         key_response = response.json()
-    #         models = key_response.get("models", [])
-    #         if models:
-    #             # Use the first model from the key generation response to make a chat completions request
-    #             model_to_use = models[0]
-    #             chat_payload = {
-    #                 "model": model_to_use,
-    #                 "messages": [
-    #                     {"role": "system", "content": "You are a chat bot."},
-    #                     {"role": "user", "content": "Hello, how are you?"},
-    #                 ],
-    #             }
-    #             chat_response = self.client.post("chat/completions", json=chat_payload, headers=headers)
-    #             # Print or log the chat response if needed