(fix) add some better load testing

This commit is contained in:
Ishaan Jaff 2024-03-22 19:45:24 -07:00
parent 48b9250a3d
commit 311918b99c
6 changed files with 270 additions and 14 deletions

View file

@ -0,0 +1,20 @@
# Use the official Python image as the base image
FROM python:3.9-slim
# Set the working directory in the container
WORKDIR /app
# Copy the Python requirements file
COPY requirements.txt .
# Install the Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
# Copy the application code
COPY . .
# Expose the port the app will run on
EXPOSE 8090
# Start the application
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8090"]

View file

@ -0,0 +1,59 @@
# import sys, os
# sys.path.insert(
# 0, os.path.abspath("../")
# ) # Adds the parent directory to the system path
from fastapi import FastAPI, Request, status, HTTPException, Depends
from fastapi.responses import StreamingResponse
from fastapi.security import OAuth2PasswordBearer
from fastapi.middleware.cors import CORSMiddleware
import uuid
import litellm
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
litellm_router = litellm.Router(
model_list=[
{
"model_name": "anything", # model alias -> loadbalance between models with same `model_name`
"litellm_params": { # params for litellm completion/embedding call
"model": "openai/anything", # actual model name
"api_key": "sk-1234",
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
},
}
]
)
# for completion
@app.post("/chat/completions")
@app.post("/v1/chat/completions")
async def completion(request: Request):
# this proxy uses the OpenAI SDK to call a fixed endpoint
response = await litellm_router.acompletion(
model="anything",
messages=[
{
"role": "user",
"content": "hello who are you",
}
],
)
return response
if __name__ == "__main__":
import uvicorn
# run this on 8090, 8091, 8092 and 8093
uvicorn.run(app, host="0.0.0.0", port=8090)

View file

@ -0,0 +1,54 @@
# import sys, os
# sys.path.insert(
# 0, os.path.abspath("../")
# ) # Adds the parent directory to the system path
from fastapi import FastAPI, Request, status, HTTPException, Depends
from fastapi.responses import StreamingResponse
from fastapi.security import OAuth2PasswordBearer
from fastapi.middleware.cors import CORSMiddleware
import uuid
import litellm
import openai
from openai import AsyncOpenAI
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
litellm_client = AsyncOpenAI(
base_url="https://exampleopenaiendpoint-production.up.railway.app/",
api_key="sk-1234",
)
# for completion
@app.post("/chat/completions")
@app.post("/v1/chat/completions")
async def completion(request: Request):
# this proxy uses the OpenAI SDK to call a fixed endpoint
response = await litellm.acompletion(
model="openai/anything",
messages=[
{
"role": "user",
"content": "hello who are you",
}
],
client=litellm_client,
)
return response
if __name__ == "__main__":
import uvicorn
# run this on 8090, 8091, 8092 and 8093
uvicorn.run(app, host="0.0.0.0", port=8090)

View file

@ -0,0 +1,59 @@
# import sys, os
# sys.path.insert(
# 0, os.path.abspath("../")
# ) # Adds the parent directory to the system path
from fastapi import FastAPI, Request, status, HTTPException, Depends
from fastapi.responses import StreamingResponse
from fastapi.security import OAuth2PasswordBearer
from fastapi.middleware.cors import CORSMiddleware
import uuid
import litellm
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
litellm_router = litellm.Router(
model_list=[
{
"model_name": "anything", # model alias -> loadbalance between models with same `model_name`
"litellm_params": { # params for litellm completion/embedding call
"model": "openai/anything", # actual model name
"api_key": "sk-1234",
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
},
}
]
)
# for completion
@app.post("/chat/completions")
@app.post("/v1/chat/completions")
async def completion(request: Request):
# this proxy uses the OpenAI SDK to call a fixed endpoint
response = await litellm_router.acompletion(
model="anything",
messages=[
{
"role": "user",
"content": "hello who are you",
}
],
)
return response
if __name__ == "__main__":
import uvicorn
# run this on 8090, 8091, 8092 and 8093
uvicorn.run(app, host="0.0.0.0", port=8090)

View file

@ -0,0 +1,52 @@
# import sys, os
# sys.path.insert(
# 0, os.path.abspath("../")
# ) # Adds the parent directory to the system path
from fastapi import FastAPI, Request, status, HTTPException, Depends
from fastapi.responses import StreamingResponse
from fastapi.security import OAuth2PasswordBearer
from fastapi.middleware.cors import CORSMiddleware
import uuid
import openai
from openai import AsyncOpenAI
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
litellm_client = AsyncOpenAI(
base_url="https://exampleopenaiendpoint-production.up.railway.app/",
api_key="sk-1234",
)
# for completion
@app.post("/chat/completions")
@app.post("/v1/chat/completions")
async def completion(request: Request):
# this proxy uses the OpenAI SDK to call a fixed endpoint
response = await litellm_client.chat.completions.create(
model="anything",
messages=[
{
"role": "user",
"content": "hello who are you",
}
],
)
return response
if __name__ == "__main__":
import uvicorn
# run this on 8090, 8091, 8092 and 8093
uvicorn.run(app, host="0.0.0.0", port=8090)

View file

@ -1,56 +1,68 @@
import time, asyncio, os
import time
import asyncio
import os
from openai import AsyncOpenAI, AsyncAzureOpenAI
import uuid
import traceback
from large_text import text
from dotenv import load_dotenv
from statistics import mean, median
litellm_client = AsyncOpenAI(base_url="http://0.0.0.0:4000", api_key="sk-1234")
litellm_client = AsyncOpenAI(base_url="http://0.0.0.0:4000/", api_key="sk-1234")
async def litellm_completion():
# Your existing code for litellm_completion goes here
try:
start_time = time.time()
response = await litellm_client.chat.completions.create(
model="fake_openai",
model="fake-openai-endpoint",
messages=[
{
"role": "user",
"content": f"{text}. Who was alexander the great? {uuid.uuid4()}",
"content": f"This is a test{uuid.uuid4()}",
}
],
user="my-new-end-user-1",
)
return response
end_time = time.time()
latency = end_time - start_time
print("response time=", latency)
return response, latency
except Exception as e:
# If there's an exception, log the error message
with open("error_log.txt", "a") as error_log:
error_log.write(f"Error during completion: {str(e)}\n")
pass
return None, 0
async def main():
for i in range(3):
latencies = []
for i in range(5):
start = time.time()
n = 10 # Number of concurrent tasks
n = 100 # Number of concurrent tasks
tasks = [litellm_completion() for _ in range(n)]
chat_completions = await asyncio.gather(*tasks)
successful_completions = [c for c in chat_completions if c is not None]
successful_completions = [c for c, l in chat_completions if c is not None]
completion_latencies = [l for c, l in chat_completions if c is not None]
latencies.extend(completion_latencies)
# Write errors to error_log.txt
with open("error_log.txt", "a") as error_log:
for completion in chat_completions:
for completion, latency in chat_completions:
if isinstance(completion, str):
error_log.write(completion + "\n")
print(n, time.time() - start, len(successful_completions))
if latencies:
average_latency = mean(latencies)
median_latency = median(latencies)
print(f"Average Latency per Response: {average_latency} seconds")
print(f"Median Latency per Response: {median_latency} seconds")
if __name__ == "__main__":
# Blank out contents of error_log.txt
open("error_log.txt", "w").close()
asyncio.run(main())