From 16e1070dbe62c630e89a27373328ec9adae0c824 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 24 Nov 2023 12:47:28 -0800 Subject: [PATCH] test: refactor testing order --- litellm/proxy/tests/test_openai_request.py | 26 ++++++++-------- litellm/router.py | 9 +++++- ...cooldowns.py => test_acooldowns_router.py} | 31 +++++++++++-------- litellm/tests/test_router.py | 8 +++-- 4 files changed, 45 insertions(+), 29 deletions(-) rename litellm/tests/{test_router_cooldowns.py => test_acooldowns_router.py} (84%) diff --git a/litellm/proxy/tests/test_openai_request.py b/litellm/proxy/tests/test_openai_request.py index 97f89c232c..f49fce7bb9 100644 --- a/litellm/proxy/tests/test_openai_request.py +++ b/litellm/proxy/tests/test_openai_request.py @@ -1,15 +1,15 @@ -import openai -client = openai.OpenAI( - api_key="anything", - base_url="http://0.0.0.0:8000" -) +# import openai +# client = openai.OpenAI( +# api_key="anything", +# base_url="http://0.0.0.0:8000" +# ) -# request sent to model set on litellm proxy, `litellm --model` -response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [ - { - "role": "user", - "content": "this is a test request, write a short poem" - } -]) +# # request sent to model set on litellm proxy, `litellm --model` +# response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [ +# { +# "role": "user", +# "content": "this is a test request, write a short poem" +# } +# ]) -print(response) +# print(response) diff --git a/litellm/router.py b/litellm/router.py index 6c0ddbcb8a..bd77f50f75 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -761,4 +761,11 @@ class Router: raise ValueError("No models available.") def flush_cache(self): - self.cache.flush_cache() \ No newline at end of file + self.cache.flush_cache() + + def reset(self): + ## clean up on close + litellm.success_callback = [] + litellm.failure_callback = [] + self.flush_cache() + \ No newline at end of file diff --git a/litellm/tests/test_router_cooldowns.py b/litellm/tests/test_acooldowns_router.py similarity index 84% rename from litellm/tests/test_router_cooldowns.py rename to litellm/tests/test_acooldowns_router.py index 0fb1369c57..0c50c686f8 100644 --- a/litellm/tests/test_router_cooldowns.py +++ b/litellm/tests/test_acooldowns_router.py @@ -35,13 +35,6 @@ model_list = [{ # list of model deployments } ] -router = Router(model_list=model_list, - redis_host=os.getenv("REDIS_HOST"), - redis_password=os.getenv("REDIS_PASSWORD"), - redis_port=int(os.getenv("REDIS_PORT")), # type: ignore - routing_strategy="simple-shuffle", - set_verbose=True, - num_retries=1) # type: ignore kwargs = {"model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "Hey, how's it going?"}],} @@ -49,17 +42,23 @@ def test_multiple_deployments_sync(): import concurrent, time litellm.set_verbose=False results = [] - + router = Router(model_list=model_list, + redis_host=os.getenv("REDIS_HOST"), + redis_password=os.getenv("REDIS_PASSWORD"), + redis_port=int(os.getenv("REDIS_PORT")), # type: ignore + routing_strategy="simple-shuffle", + set_verbose=True, + num_retries=1) # type: ignore try: - router.flush_cache() + router.reset() for _ in range(3): response = router.completion(**kwargs) results.append(response) print(results) - router.flush_cache() + router.reset() except Exception as e: print(f"FAILED TEST!") - pytest.fail(f"An error occurred - {str(e)}") + pytest.fail(f"An error occurred - {traceback.format_exc()}") # test_multiple_deployments_sync() @@ -69,7 +68,13 @@ def test_multiple_deployments_parallel(): results = [] futures = {} start_time = time.time() - router.flush_cache() + router = Router(model_list=model_list, + redis_host=os.getenv("REDIS_HOST"), + redis_password=os.getenv("REDIS_PASSWORD"), + redis_port=int(os.getenv("REDIS_PORT")), # type: ignore + routing_strategy="simple-shuffle", + set_verbose=True, + num_retries=1) # type: ignore # Assuming you have an executor instance defined somewhere in your code with concurrent.futures.ThreadPoolExecutor() as executor: for _ in range(5): @@ -96,4 +101,4 @@ def test_multiple_deployments_parallel(): # Assuming litellm, router, and executor are defined somewhere in your code -test_multiple_deployments_parallel() \ No newline at end of file +# test_multiple_deployments_parallel() \ No newline at end of file diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py index 120fe53ced..cb9e37a61d 100644 --- a/litellm/tests/test_router.py +++ b/litellm/tests/test_router.py @@ -65,7 +65,7 @@ def test_exception_raising(): except openai.AuthenticationError: print("Test Passed: Caught an OPENAI AUTH Error, Good job. This is what we needed!") os.environ["AZURE_API_KEY"] = old_api_key - router.flush_cache() + router.reset() except Exception as e: os.environ["AZURE_API_KEY"] = old_api_key print("Got unexpected exception on router!", e) @@ -112,7 +112,7 @@ def test_reading_key_from_model_list(): ] ) os.environ["AZURE_API_KEY"] = old_api_key - router.flush_cache() + router.reset() except Exception as e: os.environ["AZURE_API_KEY"] = old_api_key print(f"FAILED TEST") @@ -161,6 +161,7 @@ def test_function_calling(): router = Router(model_list=model_list, routing_strategy="latency-based-routing") response = router.completion(model="gpt-3.5-turbo-0613", messages=messages, functions=functions) + router.reset() print(response) def test_acompletion_on_router(): @@ -209,6 +210,7 @@ def test_acompletion_on_router(): assert len(response1.choices[0].message.content) > 0 assert response1.choices[0].message.content == response2.choices[0].message.content asyncio.run(get_response()) + router.reset() except litellm.Timeout as e: end_time = time.time() print(f"timeout error occurred: {end_time - start_time}") @@ -262,6 +264,7 @@ def test_function_calling_on_router(): ] response = router.completion(model="gpt-3.5-turbo", messages=messages, functions=function1) print(f"final returned response: {response}") + router.reset() assert isinstance(response["choices"][0]["message"]["function_call"], dict) except Exception as e: print(f"An exception occurred: {e}") @@ -288,6 +291,7 @@ def test_aembedding_on_router(): input=["good morning from litellm", "this is another item"], ) print(response) + router.reset() asyncio.run(embedding_call()) except Exception as e: traceback.print_exc()