diff --git a/litellm/proxy/proxy_load_test/litellm_router_proxy/Dockerfile b/litellm/proxy/proxy_load_test/litellm_router_proxy/Dockerfile new file mode 100644 index 000000000..f5787f0da --- /dev/null +++ b/litellm/proxy/proxy_load_test/litellm_router_proxy/Dockerfile @@ -0,0 +1,20 @@ +# Use the official Python image as the base image +FROM python:3.9-slim + +# Set the working directory in the container +WORKDIR /app + +# Copy the Python requirements file +COPY requirements.txt . + +# Install the Python dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the application code +COPY . . + +# Expose the port the app will run on +EXPOSE 8090 + +# Start the application +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8090"] \ No newline at end of file diff --git a/litellm/proxy/proxy_load_test/litellm_router_proxy/main.py b/litellm/proxy/proxy_load_test/litellm_router_proxy/main.py new file mode 100644 index 000000000..95e2abc15 --- /dev/null +++ b/litellm/proxy/proxy_load_test/litellm_router_proxy/main.py @@ -0,0 +1,59 @@ +# import sys, os +# sys.path.insert( +# 0, os.path.abspath("../") +# ) # Adds the parent directory to the system path +from fastapi import FastAPI, Request, status, HTTPException, Depends +from fastapi.responses import StreamingResponse +from fastapi.security import OAuth2PasswordBearer +from fastapi.middleware.cors import CORSMiddleware +import uuid +import litellm + +app = FastAPI() + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +litellm_router = litellm.Router( + model_list=[ + { + "model_name": "anything", # model alias -> loadbalance between models with same `model_name` + "litellm_params": { # params for litellm completion/embedding call + "model": "openai/anything", # actual model name + "api_key": "sk-1234", + "api_base": "https://exampleopenaiendpoint-production.up.railway.app/", + }, + } + ] +) + + +# for completion +@app.post("/chat/completions") +@app.post("/v1/chat/completions") +async def completion(request: Request): + # this proxy uses the OpenAI SDK to call a fixed endpoint + + response = await litellm_router.acompletion( + model="anything", + messages=[ + { + "role": "user", + "content": "hello who are you", + } + ], + ) + + return response + + +if __name__ == "__main__": + import uvicorn + + # run this on 8090, 8091, 8092 and 8093 + uvicorn.run(app, host="0.0.0.0", port=8090) diff --git a/litellm/proxy/proxy_load_test/simple_litellm_proxy.py b/litellm/proxy/proxy_load_test/simple_litellm_proxy.py new file mode 100644 index 000000000..003c89c77 --- /dev/null +++ b/litellm/proxy/proxy_load_test/simple_litellm_proxy.py @@ -0,0 +1,54 @@ +# import sys, os +# sys.path.insert( +# 0, os.path.abspath("../") +# ) # Adds the parent directory to the system path +from fastapi import FastAPI, Request, status, HTTPException, Depends +from fastapi.responses import StreamingResponse +from fastapi.security import OAuth2PasswordBearer +from fastapi.middleware.cors import CORSMiddleware +import uuid +import litellm +import openai +from openai import AsyncOpenAI + +app = FastAPI() + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +litellm_client = AsyncOpenAI( + base_url="https://exampleopenaiendpoint-production.up.railway.app/", + api_key="sk-1234", +) + + +# for completion +@app.post("/chat/completions") +@app.post("/v1/chat/completions") +async def completion(request: Request): + # this proxy uses the OpenAI SDK to call a fixed endpoint + + response = await litellm.acompletion( + model="openai/anything", + messages=[ + { + "role": "user", + "content": "hello who are you", + } + ], + client=litellm_client, + ) + + return response + + +if __name__ == "__main__": + import uvicorn + + # run this on 8090, 8091, 8092 and 8093 + uvicorn.run(app, host="0.0.0.0", port=8090) diff --git a/litellm/proxy/proxy_load_test/simple_litellm_router_proxy.py b/litellm/proxy/proxy_load_test/simple_litellm_router_proxy.py new file mode 100644 index 000000000..95e2abc15 --- /dev/null +++ b/litellm/proxy/proxy_load_test/simple_litellm_router_proxy.py @@ -0,0 +1,59 @@ +# import sys, os +# sys.path.insert( +# 0, os.path.abspath("../") +# ) # Adds the parent directory to the system path +from fastapi import FastAPI, Request, status, HTTPException, Depends +from fastapi.responses import StreamingResponse +from fastapi.security import OAuth2PasswordBearer +from fastapi.middleware.cors import CORSMiddleware +import uuid +import litellm + +app = FastAPI() + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +litellm_router = litellm.Router( + model_list=[ + { + "model_name": "anything", # model alias -> loadbalance between models with same `model_name` + "litellm_params": { # params for litellm completion/embedding call + "model": "openai/anything", # actual model name + "api_key": "sk-1234", + "api_base": "https://exampleopenaiendpoint-production.up.railway.app/", + }, + } + ] +) + + +# for completion +@app.post("/chat/completions") +@app.post("/v1/chat/completions") +async def completion(request: Request): + # this proxy uses the OpenAI SDK to call a fixed endpoint + + response = await litellm_router.acompletion( + model="anything", + messages=[ + { + "role": "user", + "content": "hello who are you", + } + ], + ) + + return response + + +if __name__ == "__main__": + import uvicorn + + # run this on 8090, 8091, 8092 and 8093 + uvicorn.run(app, host="0.0.0.0", port=8090) diff --git a/litellm/proxy/proxy_load_test/simple_proxy.py b/litellm/proxy/proxy_load_test/simple_proxy.py new file mode 100644 index 000000000..12fb6cffb --- /dev/null +++ b/litellm/proxy/proxy_load_test/simple_proxy.py @@ -0,0 +1,52 @@ +# import sys, os +# sys.path.insert( +# 0, os.path.abspath("../") +# ) # Adds the parent directory to the system path +from fastapi import FastAPI, Request, status, HTTPException, Depends +from fastapi.responses import StreamingResponse +from fastapi.security import OAuth2PasswordBearer +from fastapi.middleware.cors import CORSMiddleware +import uuid +import openai +from openai import AsyncOpenAI + +app = FastAPI() + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +litellm_client = AsyncOpenAI( + base_url="https://exampleopenaiendpoint-production.up.railway.app/", + api_key="sk-1234", +) + + +# for completion +@app.post("/chat/completions") +@app.post("/v1/chat/completions") +async def completion(request: Request): + # this proxy uses the OpenAI SDK to call a fixed endpoint + + response = await litellm_client.chat.completions.create( + model="anything", + messages=[ + { + "role": "user", + "content": "hello who are you", + } + ], + ) + + return response + + +if __name__ == "__main__": + import uvicorn + + # run this on 8090, 8091, 8092 and 8093 + uvicorn.run(app, host="0.0.0.0", port=8090) diff --git a/litellm/proxy/tests/load_test_completion.py b/litellm/proxy/tests/load_test_completion.py index 9450c1cb5..29d8924ab 100644 --- a/litellm/proxy/tests/load_test_completion.py +++ b/litellm/proxy/tests/load_test_completion.py @@ -1,56 +1,68 @@ -import time, asyncio, os +import time +import asyncio +import os from openai import AsyncOpenAI, AsyncAzureOpenAI import uuid import traceback from large_text import text from dotenv import load_dotenv +from statistics import mean, median -litellm_client = AsyncOpenAI(base_url="http://0.0.0.0:4000", api_key="sk-1234") +litellm_client = AsyncOpenAI(base_url="http://0.0.0.0:4000/", api_key="sk-1234") async def litellm_completion(): - # Your existing code for litellm_completion goes here try: + start_time = time.time() response = await litellm_client.chat.completions.create( - model="fake_openai", + model="fake-openai-endpoint", messages=[ { "role": "user", - "content": f"{text}. Who was alexander the great? {uuid.uuid4()}", + "content": f"This is a test{uuid.uuid4()}", } ], user="my-new-end-user-1", ) - return response + end_time = time.time() + latency = end_time - start_time + print("response time=", latency) + return response, latency except Exception as e: - # If there's an exception, log the error message with open("error_log.txt", "a") as error_log: error_log.write(f"Error during completion: {str(e)}\n") - pass + return None, 0 async def main(): - for i in range(3): + latencies = [] + for i in range(5): start = time.time() - n = 10 # Number of concurrent tasks + n = 100 # Number of concurrent tasks tasks = [litellm_completion() for _ in range(n)] chat_completions = await asyncio.gather(*tasks) - successful_completions = [c for c in chat_completions if c is not None] + successful_completions = [c for c, l in chat_completions if c is not None] + completion_latencies = [l for c, l in chat_completions if c is not None] + latencies.extend(completion_latencies) - # Write errors to error_log.txt with open("error_log.txt", "a") as error_log: - for completion in chat_completions: + for completion, latency in chat_completions: if isinstance(completion, str): error_log.write(completion + "\n") print(n, time.time() - start, len(successful_completions)) + if latencies: + average_latency = mean(latencies) + median_latency = median(latencies) + print(f"Average Latency per Response: {average_latency} seconds") + print(f"Median Latency per Response: {median_latency} seconds") + if __name__ == "__main__": - # Blank out contents of error_log.txt open("error_log.txt", "w").close() asyncio.run(main())