(fix) remove bloat - rate limit manager

2023-10-27 18:11:54 -07:00 · 2023-10-27 18:11:54 -07:00 · 224a0b9367
commit 224a0b9367
parent cac320f74e
1 changed files with 0 additions and 81 deletions
--- a/litellm/tests/test_rate_limit_manager.py
+++ b/litellm/tests/test_rate_limit_manager.py
@ -1,81 +0,0 @@
 #### What this tests ####
 #    This tests calling batch_completions by running 100 messages together
 import sys, os
 import traceback
 import pytest
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 from openai.error import Timeout
 import litellm
 from litellm import batch_completion, batch_completion_models, completion, batch_completion_models_all_responses
 # litellm.set_verbose=True
@pytest.mark.asyncio
 async def test_rate_limit_handler():
    import asyncio
    ##### USAGE ################
    from litellm import RateLimitManager
    handler = RateLimitManager(
        max_requests_per_minute = 60,
        max_tokens_per_minute = 200
    )
    async def send_request():
        response =  await handler.acompletion(
            model="gpt-3.5-turbo", 
            messages=[{
                "content": "Please provide a summary of the latest scientific discoveries."*10, 
                "role": "user"
            }]
        )
        print("got a response", response)
        return response
    tasks = []
    for _ in range(4):
        tasks.append(send_request())
    responses = await asyncio.gather(*tasks)
    for response in responses:
        print(response)
 # import asyncio
 # asyncio.run(
 #     test_rate_limit_handler()
 # )
@pytest.mark.asyncio
 async def test_rate_limit_handler_batch():
    ##### USAGE ################
    jobs = [
        {"model": "gpt-3.5-turbo-16k", "messages": [{"content": "Please provide a summary of the latest scientific discoveries.", "role": "user"}]},
        {"model": "gpt-3.5-turbo-16k", "messages": [{"content": "Please provide a summary of the latest scientific discoveries.", "role": "user"}]},
    ]
    from litellm import RateLimitManager
    handler = RateLimitManager(
        max_requests_per_minute = 60,
        max_tokens_per_minute = 20000
    )
    try:
        handler.batch_completion(
            jobs = jobs,
            api_key=os.environ['OPENAI_API_KEY'],
        )
    except Exception as e:
        print(e)
 test_rate_limit_handler_batch()