diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 76c9ed04c..049cf1d3b 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -5,9 +5,13 @@ model_list: api_base: os.environ/AZURE_API_BASE api_key: os.environ/AZURE_API_KEY api_version: "2023-07-01-preview" -litellm_settings: - set_verbose: True - success_callback: ["langfuse"] + - model_name: fake-openai-endpoint + litellm_params: + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ +general_settings: + master_key: sk-1234 router_settings: set_verbose: True debug_level: "DEBUG" \ No newline at end of file diff --git a/litellm/proxy/proxy_load_test/locustfile.py b/litellm/proxy/proxy_load_test/locustfile.py index f57ae9208..220bd3553 100644 --- a/litellm/proxy/proxy_load_test/locustfile.py +++ b/litellm/proxy/proxy_load_test/locustfile.py @@ -1,4 +1,6 @@ -from locust import HttpUser, task, between +from locust import HttpUser, task, between, events +import json +import time class MyUser(HttpUser): @@ -8,13 +10,13 @@ class MyUser(HttpUser): def chat_completion(self): headers = { "Content-Type": "application/json", - "Authorization": f"Bearer sk-1234", + "Authorization": f"Bearer sk-mh3YNUDs1d_f6fMXfvEqBA", # Include any additional headers you may need for authentication, etc. } # Customize the payload with "model" and "messages" keys payload = { - "model": "gpt-3.5-turbo", + "model": "fake-openai-endpoint", "messages": [ {"role": "system", "content": "You are a chat bot."}, {"role": "user", "content": "Hello, how are you?"}, @@ -26,3 +28,65 @@ class MyUser(HttpUser): response = self.client.post("chat/completions", json=payload, headers=headers) # Print or log the response if needed + + @task(10) + def health_readiness(self): + start_time = time.time() + response = self.client.get("health/readiness") + response_time = time.time() - start_time + if response_time > 1: + events.request_failure.fire( + request_type="GET", + name="health/readiness", + response_time=response_time, + exception=None, + response=response, + ) + + @task(10) + def health_liveliness(self): + start_time = time.time() + response = self.client.get("health/liveliness") + response_time = time.time() - start_time + if response_time > 1: + events.request_failure.fire( + request_type="GET", + name="health/liveliness", + response_time=response_time, + exception=None, + response=response, + ) + + # @task + # def key_generate(self): + # headers = { + # "Authorization": "Bearer sk-1234", + # "Content-Type": "application/json", + # } + + # payload = { + # "models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], + # "duration": "20m", + # "metadata": {"user": "ishaan@berri.ai"}, + # "team_id": "core-infra", + # "max_budget": 10, + # "soft_budget": 5, + # } + + # response = self.client.post("key/generate", json=payload, headers=headers) + + # if response.status_code == 200: + # key_response = response.json() + # models = key_response.get("models", []) + # if models: + # # Use the first model from the key generation response to make a chat completions request + # model_to_use = models[0] + # chat_payload = { + # "model": model_to_use, + # "messages": [ + # {"role": "system", "content": "You are a chat bot."}, + # {"role": "user", "content": "Hello, how are you?"}, + # ], + # } + # chat_response = self.client.post("chat/completions", json=chat_payload, headers=headers) + # # Print or log the chat response if needed