diff --git a/litellm/router.py b/litellm/router.py
index c01c7d42e..e1522719e 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -1222,11 +1222,14 @@ class Router:
         raise ValueError("No models available.")
 
     def flush_cache(self):
+        litellm.cache = None
         self.cache.flush_cache()
     
     def reset(self): 
         ## clean up on close
         litellm.success_callback = [] 
+        litellm.__async_success_callback = [] 
         litellm.failure_callback = [] 
+        litellm._async_failure_callback = [] 
         self.flush_cache() 
         
\ No newline at end of file
diff --git a/litellm/tests/langfuse.log b/litellm/tests/langfuse.log
index f1c2195df..8596eab6b 100644
Binary files a/litellm/tests/langfuse.log and b/litellm/tests/langfuse.log differ
diff --git a/litellm/tests/test_configs/test_config_no_auth.yaml b/litellm/tests/test_configs/test_config_no_auth.yaml
index 1af6e6e6d..e8125010d 100644
--- a/litellm/tests/test_configs/test_config_no_auth.yaml
+++ b/litellm/tests/test_configs/test_config_no_auth.yaml
@@ -19,3 +19,27 @@ model_list:
   model_info:
     description: this is a test openai model
   model_name: test_openai_models
+- litellm_params:
+    model: gpt-3.5-turbo
+  model_info:
+    description: this is a test openai model
+    id: 56f1bd94-3b54-4b67-9ea2-7c70e9a3a709
+  model_name: test_openai_models
+- litellm_params:
+    model: gpt-3.5-turbo
+  model_info:
+    description: this is a test openai model
+    id: 4d1ee26c-abca-450c-8744-8e87fd6755e9
+  model_name: test_openai_models
+- litellm_params:
+    model: gpt-3.5-turbo
+  model_info:
+    description: this is a test openai model
+    id: 00e19c0f-b63d-42bb-88e9-016fb0c60764
+  model_name: test_openai_models
+- litellm_params:
+    model: gpt-3.5-turbo
+  model_info:
+    description: this is a test openai model
+    id: 79fc75bf-8e1b-47d5-8d24-9365a854af03
+  model_name: test_openai_models
diff --git a/litellm/tests/test_custom_logger.py b/litellm/tests/test_custom_logger.py
index 5a26cd244..d24cf5757 100644
--- a/litellm/tests/test_custom_logger.py
+++ b/litellm/tests/test_custom_logger.py
@@ -1,13 +1,12 @@
 ### What this tests ####
-import sys, os, time, inspect, asyncio
+import sys, os, time, inspect, asyncio, traceback
 import pytest
 sys.path.insert(0, os.path.abspath('../..'))
 
 from litellm import completion, embedding
 import litellm
 from litellm.integrations.custom_logger import CustomLogger
- 
-async_success = False
+
 class MyCustomHandler(CustomLogger):
     complete_streaming_response_in_callback = ""
     def __init__(self):
@@ -51,8 +50,6 @@ class MyCustomHandler(CustomLogger):
     async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): 
         print(f"On Async success")
         self.async_success = True
-        print("Value of async success: ", self.async_success)
-        print("\n kwargs: ", kwargs)
         if kwargs.get("model") == "text-embedding-ada-002":
             self.async_success_embedding = True
             self.async_embedding_kwargs = kwargs
@@ -64,8 +61,6 @@ class MyCustomHandler(CustomLogger):
     async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): 
         print(f"On Async Failure")
         self.async_failure = True
-        print("Value of async failure: ", self.async_failure)
-        print("\n kwargs: ", kwargs)
         if kwargs.get("model") == "text-embedding-ada-002":
             self.async_failure_embedding = True
             self.async_embedding_kwargs_fail = kwargs
@@ -218,11 +213,26 @@ def test_azure_completion_stream():
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
-def test_async_custom_handler():
-    try:
-        customHandler2 = MyCustomHandler()
-        litellm.callbacks = [customHandler2]
-        litellm.set_verbose = True
+@pytest.mark.asyncio
+async def test_async_custom_handler_completion(): 
+    try: 
+        customHandler_success = MyCustomHandler()
+        customHandler_failure = MyCustomHandler()
+        # success
+        assert customHandler_success.async_success == False
+        litellm.callbacks = [customHandler_success]
+        response = await litellm.acompletion(
+                model="gpt-3.5-turbo", 
+                messages=[{
+                    "role": "user",
+                    "content": "hello from litellm test",
+                }]
+            )
+        await asyncio.sleep(1)
+        assert customHandler_success.async_success == True, "async success is not set to True even after success"
+        assert customHandler_success.async_completion_kwargs.get("model") == "gpt-3.5-turbo"
+        # failure
+        litellm.callbacks = [customHandler_failure]
         messages = [
             {"role": "system", "content": "You are a helpful assistant."},
             {
@@ -230,75 +240,57 @@ def test_async_custom_handler():
                 "content": "how do i kill someone",
             },
         ]
-        async def test_1():
-            try:
-                response = await litellm.acompletion(
-                    model="gpt-3.5-turbo", 
-                    messages=messages,
-                    api_key="test",
-                )
-            except:
-                pass
 
-        assert customHandler2.async_failure == False 
-        asyncio.run(test_1())
-        assert customHandler2.async_failure == True, "async failure is not set to True even after failure"        
-        assert customHandler2.async_completion_kwargs_fail.get("model") == "gpt-3.5-turbo"
-        assert len(str(customHandler2.async_completion_kwargs_fail.get("exception"))) > 10 # exppect APIError("OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: test. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}"), 'traceback_exception': 'Traceback (most recent call last):\n  File "/Users/ishaanjaffer/Github/litellm/litellm/llms/openai.py", line 269, in acompletion\n    response = await openai_aclient.chat.completions.create(**data)\n  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/openai/resources/chat/completions.py", line 119
-        print("Passed setting async failure")
-
-        async def test_2():
+        assert customHandler_failure.async_failure == False 
+        try: 
             response = await litellm.acompletion(
-                model="gpt-3.5-turbo", 
-                messages=[{
-                    "role": "user",
-                    "content": "hello from litellm test",
-                }]
-            )
-            print("\n response", response)
-        assert customHandler2.async_success == False
-        asyncio.run(test_2())
-        assert customHandler2.async_success == True, "async success is not set to True even after success"
-        assert customHandler2.async_completion_kwargs.get("model") == "gpt-3.5-turbo"
+                        model="gpt-3.5-turbo", 
+                        messages=messages,
+                        api_key="my-bad-key",
+                    )
+        except:
+            pass
+        assert customHandler_failure.async_failure == True, "async failure is not set to True even after failure"        
+        assert customHandler_failure.async_completion_kwargs_fail.get("model") == "gpt-3.5-turbo"
+        assert len(str(customHandler_failure.async_completion_kwargs_fail.get("exception"))) > 10 # expect APIError("OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: test. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}"), 'traceback_exception': 'Traceback (most recent call last):\n  File "/Users/ishaanjaffer/Github/litellm/litellm/llms/openai.py", line 269, in acompletion\n    response = await openai_aclient.chat.completions.create(**data)\n  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/openai/resources/chat/completions.py", line 119
+        litellm.callbacks = []
+        print("Passed setting async failure")
+    except Exception as e:
+        pytest.fail(f"An exception occurred - {str(e)}")
+# asyncio.run(test_async_custom_handler_completion())
 
-
-        async def test_3():
-            response = await litellm.aembedding(
+@pytest.mark.asyncio
+async def test_async_custom_handler_embedding(): 
+    try: 
+        customHandler_embedding = MyCustomHandler()
+        litellm.callbacks = [customHandler_embedding]
+        # success
+        assert customHandler_embedding.async_success_embedding == False
+        response = await litellm.aembedding(
                 model="text-embedding-ada-002", 
                 input = ["hello world"],
             )
-            print("\n response", response)
-        assert customHandler2.async_success_embedding == False
-        asyncio.run(test_3())
-        assert customHandler2.async_success_embedding == True, "async_success_embedding is not set to True even after success"
-        assert customHandler2.async_embedding_kwargs.get("model") == "text-embedding-ada-002"
-        assert customHandler2.async_embedding_response["usage"]["prompt_tokens"] ==2
+        await asyncio.sleep(1)
+        assert customHandler_embedding.async_success_embedding == True, "async_success_embedding is not set to True even after success"
+        assert customHandler_embedding.async_embedding_kwargs.get("model") == "text-embedding-ada-002"
+        assert customHandler_embedding.async_embedding_response["usage"]["prompt_tokens"] ==2
         print("Passed setting async success: Embedding")
-
-
-        print("Testing custom failure callback for embedding")
-
-        async def test_4():
-            try:
-                response = await litellm.aembedding(
-                    model="text-embedding-ada-002", 
-                    input = ["hello world"],
-                    api_key="test",
-                )
-            except:
-                pass
-
-        assert customHandler2.async_failure_embedding == False 
-        asyncio.run(test_4())
-        assert customHandler2.async_failure_embedding == True, "async failure embedding is not set to True even after failure"        
-        assert customHandler2.async_embedding_kwargs_fail.get("model") == "text-embedding-ada-002"
-        assert len(str(customHandler2.async_embedding_kwargs_fail.get("exception"))) > 10 # exppect APIError("OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: test. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}"), 'traceback_exception': 'Traceback (most recent call last):\n  File "/Users/ishaanjaffer/Github/litellm/litellm/llms/openai.py", line 269, in acompletion\n    response = await openai_aclient.chat.completions.create(**data)\n  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/openai/resources/chat/completions.py", line 119
-        print("Passed setting async failure")
-
+        # failure 
+        assert customHandler_embedding.async_failure_embedding == False
+        try: 
+            response = await litellm.aembedding(
+                        model="text-embedding-ada-002", 
+                        input = ["hello world"],
+                        api_key="my-bad-key",
+                    )
+        except: 
+            pass
+        assert customHandler_embedding.async_failure_embedding == True, "async failure embedding is not set to True even after failure"        
+        assert customHandler_embedding.async_embedding_kwargs_fail.get("model") == "text-embedding-ada-002"
+        assert len(str(customHandler_embedding.async_embedding_kwargs_fail.get("exception"))) > 10 # exppect APIError("OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: test. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}"), 'traceback_exception': 'Traceback (most recent call last):\n  File "/Users/ishaanjaffer/Github/litellm/litellm/llms/openai.py", line 269, in acompletion\n    response = await openai_aclient.chat.completions.create(**data)\n  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/openai/resources/chat/completions.py", line 119
     except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-# test_async_custom_handler()
-
+        pytest.fail(f"An exception occurred - {str(e)}")
+asyncio.run(test_async_custom_handler_embedding())
 from litellm import Cache
 def test_redis_cache_completion_stream():
     # Important Test - This tests if we can add to streaming cache, when custom callbacks are set 
diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py
index 4ec91ec0d..57711082d 100644
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@@ -507,7 +507,6 @@ def test_aembedding_on_router():
 			model="text-embedding-ada-002",
 			input=["good morning from litellm 2"],
 		)
-		print("sync embedding response: ", response)
 		router.reset()
 	except Exception as e:
 		traceback.print_exc()
diff --git a/litellm/utils.py b/litellm/utils.py
index efc146413..2aff72812 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -964,7 +964,7 @@ class Logging:
                             end_time=end_time,
                             print_verbose=print_verbose,
                         )
-                    if callback == "cache":
+                    if callback == "cache" and litellm.cache is not None:
                         # this only logs streaming once, complete_streaming_response exists i.e when stream ends
                         print_verbose("success_callback: reaches cache for logging!")
                         kwargs = self.model_call_details
@@ -1052,7 +1052,7 @@ class Logging:
         start_time, end_time, result = self._success_handler_helper_fn(start_time=start_time, end_time=end_time, result=result)
         for callback in litellm._async_success_callback:
             try: 
-                if callback == "cache":
+                if callback == "cache" and litellm.cache is not None:
                     # set_cache once complete streaming response is built
                     print_verbose("async success_callback: reaches cache for logging!")
                     kwargs = self.model_call_details
@@ -1238,7 +1238,7 @@ class Logging:
                         print_verbose=print_verbose,
                         callback_func=callback
                     )              
-            except: 
+            except Exception as e: 
                 print_verbose(
                     f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
                 )
@@ -1649,6 +1649,19 @@ def client(original_function):
                 result._response_ms = (end_time - start_time).total_seconds() * 1000 # return response latency in ms like openai
             return result
         except Exception as e: 
+            traceback_exception = traceback.format_exc()
+            crash_reporting(*args, **kwargs, exception=traceback_exception)
+            end_time = datetime.datetime.now()
+            if logging_obj:
+                try:
+                    logging_obj.failure_handler(e, traceback_exception, start_time, end_time) # DO NOT MAKE THREADED - router retry fallback relies on this!
+                except Exception as e: 
+                    raise e
+                try:
+                    await logging_obj.async_failure_handler(e, traceback_exception, start_time, end_time)
+                except Exception as e:
+                    raise e
+            
             call_type = original_function.__name__
             if call_type == CallTypes.acompletion.value:
                 num_retries = (
@@ -1658,27 +1671,24 @@ def client(original_function):
                 )
                 litellm.num_retries = None # set retries to None to prevent infinite loops 
                 context_window_fallback_dict = kwargs.get("context_window_fallback_dict", {})
-
+                
                 if num_retries: 
-                    kwargs["num_retries"] = num_retries
-                    kwargs["original_function"] = original_function
-                    if (isinstance(e, openai.RateLimitError)): # rate limiting specific error 
-                        kwargs["retry_strategy"] = "exponential_backoff_retry"
-                    elif (isinstance(e, openai.APIError)): # generic api error
-                        kwargs["retry_strategy"] = "constant_retry"
-                    return await litellm.acompletion_with_retries(*args, **kwargs)
+                    try: 
+                        kwargs["num_retries"] = num_retries
+                        kwargs["original_function"] = original_function
+                        if (isinstance(e, openai.RateLimitError)): # rate limiting specific error 
+                            kwargs["retry_strategy"] = "exponential_backoff_retry"
+                        elif (isinstance(e, openai.APIError)): # generic api error
+                            kwargs["retry_strategy"] = "constant_retry"
+                        return await litellm.acompletion_with_retries(*args, **kwargs)
+                    except:
+                        pass
                 elif isinstance(e, litellm.exceptions.ContextWindowExceededError) and context_window_fallback_dict and model in context_window_fallback_dict:
                     if len(args) > 0:
                         args[0]  = context_window_fallback_dict[model]
                     else:
                         kwargs["model"] = context_window_fallback_dict[model]
                     return await original_function(*args, **kwargs)
-            traceback_exception = traceback.format_exc()
-            crash_reporting(*args, **kwargs, exception=traceback_exception)
-            end_time = datetime.datetime.now()
-            if logging_obj:
-                logging_obj.failure_handler(e, traceback_exception, start_time, end_time) # DO NOT MAKE THREADED - router retry fallback relies on this!
-                await logging_obj.async_failure_handler(e, traceback_exception, start_time, end_time)
             raise e
 
     is_coroutine = inspect.iscoroutinefunction(original_function)