(docs) load testing proxy

2024-03-14 15:19:39 -07:00 · 2024-03-14 15:19:39 -07:00 · 517e453adf
commit 517e453adf
parent 8f4a99e35e
2 changed files with 74 additions and 6 deletions
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -5,9 +5,13 @@ model_list:
      api_base: os.environ/AZURE_API_BASE
      api_key: os.environ/AZURE_API_KEY
      api_version: "2023-07-01-preview"
-litellm_settings:
-  set_verbose: True
-  success_callback: ["langfuse"]
+  - model_name: fake-openai-endpoint
+    litellm_params:
+      model: openai/fake
+      api_key: fake-key
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+general_settings:
+  master_key: sk-1234
 router_settings:
  set_verbose: True
  debug_level: "DEBUG"
--- a/litellm/proxy/proxy_load_test/locustfile.py
+++ b/litellm/proxy/proxy_load_test/locustfile.py
@ -1,4 +1,6 @@
-from locust import HttpUser, task, between
+from locust import HttpUser, task, between, events
+import json
+import time


 class MyUser(HttpUser):
@ -8,13 +10,13 @@ class MyUser(HttpUser):
    def chat_completion(self):
        headers = {
            "Content-Type": "application/json",
-            "Authorization": f"Bearer sk-1234",
+            "Authorization": f"Bearer sk-mh3YNUDs1d_f6fMXfvEqBA",
            # Include any additional headers you may need for authentication, etc.
        }

        # Customize the payload with "model" and "messages" keys
        payload = {
-            "model": "gpt-3.5-turbo",
+            "model": "fake-openai-endpoint",
            "messages": [
                {"role": "system", "content": "You are a chat bot."},
                {"role": "user", "content": "Hello, how are you?"},
@ -26,3 +28,65 @@ class MyUser(HttpUser):
        response = self.client.post("chat/completions", json=payload, headers=headers)

        # Print or log the response if needed
+
+    @task(10)
+    def health_readiness(self):
+        start_time = time.time()
+        response = self.client.get("health/readiness")
+        response_time = time.time() - start_time
+        if response_time > 1:
+            events.request_failure.fire(
+                request_type="GET",
+                name="health/readiness",
+                response_time=response_time,
+                exception=None,
+                response=response,
+            )
+
+    @task(10)
+    def health_liveliness(self):
+        start_time = time.time()
+        response = self.client.get("health/liveliness")
+        response_time = time.time() - start_time
+        if response_time > 1:
+            events.request_failure.fire(
+                request_type="GET",
+                name="health/liveliness",
+                response_time=response_time,
+                exception=None,
+                response=response,
+            )
+
+    # @task
+    # def key_generate(self):
+    #     headers = {
+    #         "Authorization": "Bearer sk-1234",
+    #         "Content-Type": "application/json",
+    #     }
+
+    #     payload = {
+    #         "models": ["gpt-3.5-turbo", "gpt-4", "claude-2"],
+    #         "duration": "20m",
+    #         "metadata": {"user": "ishaan@berri.ai"},
+    #         "team_id": "core-infra",
+    #         "max_budget": 10,
+    #         "soft_budget": 5,
+    #     }
+
+    #     response = self.client.post("key/generate", json=payload, headers=headers)
+
+    #     if response.status_code == 200:
+    #         key_response = response.json()
+    #         models = key_response.get("models", [])
+    #         if models:
+    #             # Use the first model from the key generation response to make a chat completions request
+    #             model_to_use = models[0]
+    #             chat_payload = {
+    #                 "model": model_to_use,
+    #                 "messages": [
+    #                     {"role": "system", "content": "You are a chat bot."},
+    #                     {"role": "user", "content": "Hello, how are you?"},
+    #                 ],
+    #             }
+    #             chat_response = self.client.post("chat/completions", json=chat_payload, headers=headers)
+    #             # Print or log the chat response if needed