forked from phoenix/litellm-mirror
(docs) load testing proxy
This commit is contained in:
parent
8f4a99e35e
commit
517e453adf
2 changed files with 74 additions and 6 deletions
|
@ -5,9 +5,13 @@ model_list:
|
|||
api_base: os.environ/AZURE_API_BASE
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_version: "2023-07-01-preview"
|
||||
litellm_settings:
|
||||
set_verbose: True
|
||||
success_callback: ["langfuse"]
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
router_settings:
|
||||
set_verbose: True
|
||||
debug_level: "DEBUG"
|
|
@ -1,4 +1,6 @@
|
|||
from locust import HttpUser, task, between
|
||||
from locust import HttpUser, task, between, events
|
||||
import json
|
||||
import time
|
||||
|
||||
|
||||
class MyUser(HttpUser):
|
||||
|
@ -8,13 +10,13 @@ class MyUser(HttpUser):
|
|||
def chat_completion(self):
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer sk-1234",
|
||||
"Authorization": f"Bearer sk-mh3YNUDs1d_f6fMXfvEqBA",
|
||||
# Include any additional headers you may need for authentication, etc.
|
||||
}
|
||||
|
||||
# Customize the payload with "model" and "messages" keys
|
||||
payload = {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"model": "fake-openai-endpoint",
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are a chat bot."},
|
||||
{"role": "user", "content": "Hello, how are you?"},
|
||||
|
@ -26,3 +28,65 @@ class MyUser(HttpUser):
|
|||
response = self.client.post("chat/completions", json=payload, headers=headers)
|
||||
|
||||
# Print or log the response if needed
|
||||
|
||||
@task(10)
|
||||
def health_readiness(self):
|
||||
start_time = time.time()
|
||||
response = self.client.get("health/readiness")
|
||||
response_time = time.time() - start_time
|
||||
if response_time > 1:
|
||||
events.request_failure.fire(
|
||||
request_type="GET",
|
||||
name="health/readiness",
|
||||
response_time=response_time,
|
||||
exception=None,
|
||||
response=response,
|
||||
)
|
||||
|
||||
@task(10)
|
||||
def health_liveliness(self):
|
||||
start_time = time.time()
|
||||
response = self.client.get("health/liveliness")
|
||||
response_time = time.time() - start_time
|
||||
if response_time > 1:
|
||||
events.request_failure.fire(
|
||||
request_type="GET",
|
||||
name="health/liveliness",
|
||||
response_time=response_time,
|
||||
exception=None,
|
||||
response=response,
|
||||
)
|
||||
|
||||
# @task
|
||||
# def key_generate(self):
|
||||
# headers = {
|
||||
# "Authorization": "Bearer sk-1234",
|
||||
# "Content-Type": "application/json",
|
||||
# }
|
||||
|
||||
# payload = {
|
||||
# "models": ["gpt-3.5-turbo", "gpt-4", "claude-2"],
|
||||
# "duration": "20m",
|
||||
# "metadata": {"user": "ishaan@berri.ai"},
|
||||
# "team_id": "core-infra",
|
||||
# "max_budget": 10,
|
||||
# "soft_budget": 5,
|
||||
# }
|
||||
|
||||
# response = self.client.post("key/generate", json=payload, headers=headers)
|
||||
|
||||
# if response.status_code == 200:
|
||||
# key_response = response.json()
|
||||
# models = key_response.get("models", [])
|
||||
# if models:
|
||||
# # Use the first model from the key generation response to make a chat completions request
|
||||
# model_to_use = models[0]
|
||||
# chat_payload = {
|
||||
# "model": model_to_use,
|
||||
# "messages": [
|
||||
# {"role": "system", "content": "You are a chat bot."},
|
||||
# {"role": "user", "content": "Hello, how are you?"},
|
||||
# ],
|
||||
# }
|
||||
# chat_response = self.client.post("chat/completions", json=chat_payload, headers=headers)
|
||||
# # Print or log the chat response if needed
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue