litellm-mirror/litellm/proxy/tests/load_test_completion.py

import time, asyncio
from openai import AsyncOpenAI
import uuid


litellm_client = AsyncOpenAI(
    api_key="test",
    base_url="http://0.0.0.0:8000"
)

async def litellm_completion():
  return await litellm_client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}],
        )


async def main():
    start = time.time()
    n = 1  # Number of concurrent tasks
    tasks = [litellm_completion() for _ in range(n)]
    chat_completions = await asyncio.gather(*tasks)
    successful_completions = [c for c in chat_completions if c is not None]
    print(n, time.time() - start, len(successful_completions))

if __name__ == "__main__":
    asyncio.run(main())