(fix) add some better load testing

This commit is contained in:
Ishaan Jaff 2024-03-22 19:45:24 -07:00
parent 9bd53cec6a
commit c6ffd456ff
6 changed files with 270 additions and 14 deletions

View file

@ -0,0 +1,20 @@
# Use the official Python image as the base image
FROM python:3.9-slim
# Set the working directory in the container
WORKDIR /app
# Copy the Python requirements file
COPY requirements.txt .
# Install the Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
# Copy the application code
COPY . .
# Expose the port the app will run on
EXPOSE 8090
# Start the application
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8090"]

View file

@ -0,0 +1,59 @@
# import sys, os
# sys.path.insert(
# 0, os.path.abspath("../")
# ) # Adds the parent directory to the system path
from fastapi import FastAPI, Request, status, HTTPException, Depends
from fastapi.responses import StreamingResponse
from fastapi.security import OAuth2PasswordBearer
from fastapi.middleware.cors import CORSMiddleware
import uuid
import litellm
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
litellm_router = litellm.Router(
model_list=[
{
"model_name": "anything", # model alias -> loadbalance between models with same `model_name`
"litellm_params": { # params for litellm completion/embedding call
"model": "openai/anything", # actual model name
"api_key": "sk-1234",
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
},
}
]
)
# for completion
@app.post("/chat/completions")
@app.post("/v1/chat/completions")
async def completion(request: Request):
# this proxy uses the OpenAI SDK to call a fixed endpoint
response = await litellm_router.acompletion(
model="anything",
messages=[
{
"role": "user",
"content": "hello who are you",
}
],
)
return response
if __name__ == "__main__":
import uvicorn
# run this on 8090, 8091, 8092 and 8093
uvicorn.run(app, host="0.0.0.0", port=8090)