diff --git a/litellm/tests/test_proxy_server_keys.py b/litellm/tests/test_proxy_server_keys.py index 7fad89e68d..73dbf24a31 100644 --- a/litellm/tests/test_proxy_server_keys.py +++ b/litellm/tests/test_proxy_server_keys.py @@ -14,7 +14,6 @@ import pytest, logging import litellm from litellm import embedding, completion, completion_cost, Timeout from litellm import RateLimitError -from httpx import AsyncClient # Configure logging logging.basicConfig( @@ -30,7 +29,6 @@ from litellm.proxy.proxy_server import ( router, save_worker_config, startup_event, - asyncio, ) # Replace with the actual module where your FastAPI router is defined filepath = os.path.dirname(os.path.abspath(__file__)) @@ -53,50 +51,27 @@ save_worker_config( save=False, use_queue=False, ) +app = FastAPI() +app.include_router(router) # Include your router in the test app -import asyncio - - -# @pytest.fixture -# def event_loop(): -# """Create an instance of the default event loop for each test case.""" -# policy = asyncio.WindowsSelectorEventLoopPolicy() -# res = policy.new_event_loop() -# asyncio.set_event_loop(res) -# res._close = res.close -# res.close = lambda: None - -# yield res - -# res._close() +@app.on_event("startup") +async def wrapper_startup_event(): + await startup_event() # Here you create a fixture that will be used by your tests # Make sure the fixture returns TestClient(app) -@pytest.fixture(scope="function") -async def client(): - from litellm.proxy.proxy_server import ( - cleanup_router_config_variables, - initialize, - ProxyLogging, - proxy_logging_obj, - ) +@pytest.fixture(autouse=True) +def client(): + from litellm.proxy.proxy_server import cleanup_router_config_variables - cleanup_router_config_variables() # rest proxy before test - proxy_logging_obj = ProxyLogging(user_api_key_cache={}) - proxy_logging_obj._init_litellm_callbacks() # INITIALIZE LITELLM CALLBACKS ON SERVER STARTUP <- do this to catch any logging errors on startup, not when calls are being made - - await initialize(config=config_fp, debug=True) - app = FastAPI() - app.include_router(router) # Include your router in the test app - async with AsyncClient(app=app, base_url="http://testserver") as client: + cleanup_router_config_variables() + with TestClient(app) as client: yield client -@pytest.mark.parametrize("anyio_backend", ["asyncio"]) -@pytest.mark.anyio -async def test_add_new_key(client): +def test_add_new_key(client): try: # Your test data test_data = { @@ -104,18 +79,18 @@ async def test_add_new_key(client): "aliases": {"mistral-7b": "gpt-3.5-turbo"}, "duration": "20m", } - print("testing proxy server - test_add_new_key") + print("testing proxy server") # Your bearer token token = os.getenv("PROXY_MASTER_KEY") headers = {"Authorization": f"Bearer {token}"} - response = await client.post("/key/generate", json=test_data, headers=headers) + response = client.post("/key/generate", json=test_data, headers=headers) print(f"response: {response.text}") assert response.status_code == 200 result = response.json() assert result["key"].startswith("sk-") - async def _post_data(): + def _post_data(): json_data = { "model": "azure-model", "messages": [ @@ -125,22 +100,20 @@ async def test_add_new_key(client): } ], } - response = await client.post( + response = client.post( "/chat/completions", json=json_data, headers={"Authorization": f"Bearer {result['key']}"}, ) return response - await _post_data() + _post_data() print(f"Received response: {result}") except Exception as e: pytest.fail(f"LiteLLM Proxy test failed. Exception: {str(e)}") -@pytest.mark.parametrize("anyio_backend", ["asyncio"]) -@pytest.mark.anyio -async def test_update_new_key(client): +def test_update_new_key(client): try: # Your test data test_data = { @@ -148,99 +121,34 @@ async def test_update_new_key(client): "aliases": {"mistral-7b": "gpt-3.5-turbo"}, "duration": "20m", } - print("testing proxy server-test_update_new_key") + print("testing proxy server") # Your bearer token token = os.getenv("PROXY_MASTER_KEY") headers = {"Authorization": f"Bearer {token}"} - response = await client.post("/key/generate", json=test_data, headers=headers) + response = client.post("/key/generate", json=test_data, headers=headers) print(f"response: {response.text}") assert response.status_code == 200 result = response.json() assert result["key"].startswith("sk-") - async def _post_data(): + def _post_data(): json_data = {"models": ["bedrock-models"], "key": result["key"]} - response = await client.post("/key/update", json=json_data, headers=headers) + response = client.post("/key/update", json=json_data, headers=headers) print(f"response text: {response.text}") assert response.status_code == 200 return response - await _post_data() + _post_data() print(f"Received response: {result}") except Exception as e: pytest.fail(f"LiteLLM Proxy test failed. Exception: {str(e)}") -# Run the test - only runs via pytest +# # Run the test - only runs via pytest -@pytest.mark.parametrize("anyio_backend", ["asyncio"]) -@pytest.mark.anyio -async def test_add_new_key_max_parallel_limit(client): - try: - import anyio - - print("ANY IO BACKENDS") - print(anyio.get_all_backends()) - # Your test data - test_data = { - "duration": "20m", - "max_parallel_requests": 1, - "metadata": {"type": "ishaan-test"}, - } - # Your bearer token - token = os.getenv("PROXY_MASTER_KEY") - - headers = {"Authorization": f"Bearer {token}"} - - response = await client.post("/key/generate", json=test_data, headers=headers) - print(f"response: {response.text}") - assert response.status_code == 200 - result = response.json() - - async def _post_data(): - json_data = { - "model": "azure-model", - "messages": [ - { - "role": "user", - "content": f"this is a test request, write a short poem {time.time()}", - } - ], - } - - response = await client.post( - "/chat/completions", - json=json_data, - headers={"Authorization": f"Bearer {result['key']}"}, - ) - return response - - async def _run_in_parallel(): - try: - futures = [_post_data() for _ in range(2)] - responses = await asyncio.gather(*futures) - print("response1 status: ", responses[0].status_code) - print("response2 status: ", responses[1].status_code) - - if any(response.status_code == 429 for response in responses): - pass - else: - raise Exception() - except Exception as e: - pass - - await _run_in_parallel() - - # assert responses[0].status_code == 200 or responses[1].status_code == 200 - except Exception as e: - pytest.fail(f"LiteLLM Proxy test failed. Exception: {str(e)}") - - -@pytest.mark.parametrize("anyio_backend", ["asyncio"]) -@pytest.mark.anyio -async def test_add_new_key_max_parallel_limit_streaming(client): +def test_add_new_key_max_parallel_limit(client): try: # Your test data test_data = {"duration": "20m", "max_parallel_requests": 1} @@ -248,12 +156,60 @@ async def test_add_new_key_max_parallel_limit_streaming(client): token = os.getenv("PROXY_MASTER_KEY") headers = {"Authorization": f"Bearer {token}"} - response = await client.post("/key/generate", json=test_data, headers=headers) + response = client.post("/key/generate", json=test_data, headers=headers) print(f"response: {response.text}") assert response.status_code == 200 result = response.json() - async def _post_data(): + def _post_data(): + json_data = { + "model": "azure-model", + "messages": [ + { + "role": "user", + "content": f"this is a test request, write a short poem {time.time()}", + } + ], + } + response = client.post( + "/chat/completions", + json=json_data, + headers={"Authorization": f"Bearer {result['key']}"}, + ) + return response + + def _run_in_parallel(): + with ThreadPoolExecutor(max_workers=2) as executor: + future1 = executor.submit(_post_data) + future2 = executor.submit(_post_data) + + # Obtain the results from the futures + response1 = future1.result() + response2 = future2.result() + if response1.status_code == 429 or response2.status_code == 429: + pass + else: + raise Exception() + + _run_in_parallel() + except Exception as e: + pytest.fail(f"LiteLLM Proxy test failed. Exception: {str(e)}") + + +def test_add_new_key_max_parallel_limit_streaming(client): + try: + # Your test data + test_data = {"duration": "20m", "max_parallel_requests": 1} + # Your bearer token + token = os.getenv("PROXY_MASTER_KEY") + + headers = {"Authorization": f"Bearer {token}"} + response = client.post("/key/generate", json=test_data, headers=headers) + print(f"response: {response.text}") + assert response.status_code == 200 + result = response.json() + + def _post_data(): json_data = { "model": "azure-model", "messages": [ @@ -264,26 +220,25 @@ async def test_add_new_key_max_parallel_limit_streaming(client): ], "stream": True, } - response = await client.post( + response = client.post( "/chat/completions", json=json_data, headers={"Authorization": f"Bearer {result['key']}"}, ) return response - async def _run_in_parallel(): - try: - futures = [_post_data() for _ in range(2)] - responses = await asyncio.gather(*futures) - print("response1 status: ", responses[0].status_code) - print("response2 status: ", responses[1].status_code) + def _run_in_parallel(): + with ThreadPoolExecutor(max_workers=2) as executor: + future1 = executor.submit(_post_data) + future2 = executor.submit(_post_data) - if any(response.status_code == 429 for response in responses): + # Obtain the results from the futures + response1 = future1.result() + response2 = future2.result() + if response1.status_code == 429 or response2.status_code == 429: pass else: raise Exception() - except Exception as e: - pass _run_in_parallel() except Exception as e: