diff --git a/litellm/proxy/tests/load_test_completion.py b/litellm/proxy/tests/load_test_completion.py new file mode 100644 index 0000000000..133b336646 --- /dev/null +++ b/litellm/proxy/tests/load_test_completion.py @@ -0,0 +1,27 @@ +import time, asyncio +from openai import AsyncOpenAI +import uuid + + +litellm_client = AsyncOpenAI( + api_key="test", + base_url="http://0.0.0.0:8000" +) + +async def litellm_completion(): + return await litellm_client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}], + ) + + +async def main(): + start = time.time() + n = 1 # Number of concurrent tasks + tasks = [litellm_completion() for _ in range(n)] + chat_completions = await asyncio.gather(*tasks) + successful_completions = [c for c in chat_completions if c is not None] + print(n, time.time() - start, len(successful_completions)) + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file