diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc
index b15b880211..8e25efb870 100644
Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ
diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index ed8d32b965..38191ff5c0 100644
Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index 8d8cbae60a..df6168122e 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/integrations/litedebugger.py b/litellm/integrations/litedebugger.py
index 5187d555f8..314fb7568b 100644
--- a/litellm/integrations/litedebugger.py
+++ b/litellm/integrations/litedebugger.py
@@ -1,5 +1,5 @@
 import requests, traceback, json, os
-
+import types
 
 class LiteDebugger:
     user_email = None
@@ -7,13 +7,12 @@ class LiteDebugger:
 
     def __init__(self, email=None):
         self.api_url = "https://api.litellm.ai/debugger"
-        # self.api_url = "http://0.0.0.0:4000/debugger"
         self.validate_environment(email)
         pass
 
     def validate_environment(self, email):
         try:
-            self.user_email = os.getenv("LITELLM_EMAIL") or email
+            self.user_email = (email or os.getenv("LITELLM_TOKEN") or os.getenv("LITELLM_EMAIL"))
             self.dashboard_url = "https://admin.litellm.ai/" + self.user_email
             try:
                 print(
@@ -23,11 +22,11 @@ class LiteDebugger:
                 print(f"Here's your LiteLLM Dashboard 👉 {self.dashboard_url}")
             if self.user_email == None:
                 raise Exception(
-                    "[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_EMAIL. Set it in your environment. Eg.: os.environ['LITELLM_EMAIL']= <your_email>"
+                    "[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_TOKEN. Set it in your environment. Eg.: os.environ['LITELLM_TOKEN']= <your_email>"
                 )
         except Exception as e:
             raise ValueError(
-                "[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_EMAIL. Set it in your environment. Eg.: os.environ['LITELLM_EMAIL']= <your_email>"
+                "[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_TOKEN. Set it in your environment. Eg.: os.environ['LITELLM_TOKEN']= <your_email>"
             )
 
     def input_log_event(
@@ -36,6 +35,7 @@ class LiteDebugger:
         messages,
         end_user,
         litellm_call_id,
+        call_type,
         print_verbose,
         litellm_params,
         optional_params,
@@ -52,39 +52,76 @@ class LiteDebugger:
 
             updated_litellm_params = remove_key_value(litellm_params, "logger_fn")
 
-            litellm_data_obj = {
-                "model": model,
-                "messages": messages,
-                "end_user": end_user,
-                "status": "initiated",
-                "litellm_call_id": litellm_call_id,
-                "user_email": self.user_email,
-                "litellm_params": updated_litellm_params,
-                "optional_params": optional_params,
-            }
-            print_verbose(
-                f"LiteLLMDebugger: Logging - logged data obj {litellm_data_obj}"
-            )
-            response = requests.post(
-                url=self.api_url,
-                headers={"content-type": "application/json"},
-                data=json.dumps(litellm_data_obj),
-            )
-            print_verbose(f"LiteDebugger: api response - {response.text}")
+            if call_type == "embedding":
+                for message in messages: # assuming the input is a list as required by the embedding function
+                    litellm_data_obj = {
+                        "model": model,
+                        "messages": [{"role": "user", "content": message}],
+                        "end_user": end_user,
+                        "status": "initiated",
+                        "litellm_call_id": litellm_call_id,
+                        "user_email": self.user_email,
+                        "litellm_params": updated_litellm_params,
+                        "optional_params": optional_params,
+                    }
+                    print_verbose(
+                        f"LiteLLMDebugger: Logging - logged data obj {litellm_data_obj}"
+                    )
+                    response = requests.post(
+                        url=self.api_url,
+                        headers={"content-type": "application/json"},
+                        data=json.dumps(litellm_data_obj),
+                    )
+                print_verbose(f"LiteDebugger: embedding api response - {response.text}")
+            elif call_type == "completion":
+                litellm_data_obj = {
+                    "model": model,
+                    "messages": messages if isinstance(messages, list) else [{"role": "user", "content": messages}],
+                    "end_user": end_user,
+                    "status": "initiated",
+                    "litellm_call_id": litellm_call_id,
+                    "user_email": self.user_email,
+                    "litellm_params": updated_litellm_params,
+                    "optional_params": optional_params,
+                }
+                print_verbose(
+                    f"LiteLLMDebugger: Logging - logged data obj {litellm_data_obj}"
+                )
+                response = requests.post(
+                    url=self.api_url,
+                    headers={"content-type": "application/json"},
+                    data=json.dumps(litellm_data_obj),
+                )
+                print_verbose(f"LiteDebugger: completion api response - {response.text}")
         except:
             print_verbose(
                 f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}"
             )
             pass
 
-    def post_call_log_event(self, original_response, litellm_call_id, print_verbose):
+    def post_call_log_event(self, original_response, litellm_call_id, print_verbose, call_type, stream):
         try:
-            litellm_data_obj = {
-                "status": "received",
-                "additional_details": {"original_response": original_response},
-                "litellm_call_id": litellm_call_id,
-                "user_email": self.user_email,
-            }
+            if call_type == "embedding":
+                litellm_data_obj = {
+                    "status": "received",
+                    "additional_details": {"original_response": str(original_response["data"][0]["embedding"][:5])}, # don't store the entire vector
+                    "litellm_call_id": litellm_call_id,
+                    "user_email": self.user_email,
+                }
+            elif call_type == "completion" and not stream:
+                litellm_data_obj = {
+                    "status": "received",
+                    "additional_details": {"original_response": original_response},
+                    "litellm_call_id": litellm_call_id,
+                    "user_email": self.user_email,
+                }
+            elif call_type == "completion" and stream:
+                litellm_data_obj = {
+                    "status": "received",
+                    "additional_details": {"original_response": "Streamed response" if isinstance(original_response, types.GeneratorType) else original_response},
+                    "litellm_call_id": litellm_call_id,
+                    "user_email": self.user_email,
+                }
             response = requests.post(
                 url=self.api_url,
                 headers={"content-type": "application/json"},
@@ -98,32 +135,28 @@ class LiteDebugger:
 
     def log_event(
         self,
-        model,
-        messages,
         end_user,
         response_obj,
         start_time,
         end_time,
         litellm_call_id,
         print_verbose,
+        call_type, 
+        stream = False
     ):
         try:
             print_verbose(
-                f"LiteLLMDebugger: Logging - Enters handler logging function for model {model} with response object {response_obj}"
+                f"LiteLLMDebugger: Logging - Enters handler logging function for function {call_type} and stream set to {stream} with response object {response_obj}"
             )
             total_cost = 0  # [TODO] implement cost tracking
             response_time = (end_time - start_time).total_seconds()
-            if "choices" in response_obj:
+            if call_type == "completion" and stream == False:
                 litellm_data_obj = {
                     "response_time": response_time,
-                    "model": response_obj["model"],
                     "total_cost": total_cost,
-                    "messages": messages,
-                    "response": response["choices"][0]["message"]["content"],
-                    "end_user": end_user,
+                    "response": response_obj["choices"][0]["message"]["content"],
                     "litellm_call_id": litellm_call_id,
                     "status": "success",
-                    "user_email": self.user_email,
                 }
                 print_verbose(
                     f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
@@ -133,45 +166,26 @@ class LiteDebugger:
                     headers={"content-type": "application/json"},
                     data=json.dumps(litellm_data_obj),
                 )
-            elif (
-                "data" in response_obj
-                and isinstance(response_obj["data"], list)
-                and len(response_obj["data"]) > 0
-                and "embedding" in response_obj["data"][0]
-            ):
-                print(f"messages: {messages}")
+            elif call_type == "embedding":
                 litellm_data_obj = {
                     "response_time": response_time,
-                    "model": response_obj["model"],
                     "total_cost": total_cost,
-                    "messages": messages,
                     "response": str(response_obj["data"][0]["embedding"][:5]),
-                    "end_user": end_user,
                     "litellm_call_id": litellm_call_id,
                     "status": "success",
-                    "user_email": self.user_email,
                 }
-                print_verbose(
-                    f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
-                )
                 response = requests.post(
                     url=self.api_url,
                     headers={"content-type": "application/json"},
                     data=json.dumps(litellm_data_obj),
                 )
-            elif (
-                isinstance(response_obj, object)
-                and response_obj.__class__.__name__ == "CustomStreamWrapper"
-            ):
+            elif call_type == "completion" and stream == True:
                 litellm_data_obj = {
                     "response_time": response_time,
                     "total_cost": total_cost,
-                    "messages": messages,
-                    "response": "Streamed response",
-                    "end_user": end_user,
+                    "response": "streamed response",
                     "litellm_call_id": litellm_call_id,
                     "status": "success",
-                    "user_email": self.user_email,
                 }
                 print_verbose(
                     f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
@@ -188,7 +202,6 @@ class LiteDebugger:
                     "response_time": response_time,
                     "model": response_obj["model"],
                     "total_cost": total_cost,
-                    "messages": messages,
                     "error": response_obj["error"],
                     "end_user": end_user,
                     "litellm_call_id": litellm_call_id,
diff --git a/litellm/llms/ai21.py b/litellm/llms/ai21.py
index 9b856be4c7..6a22b99e89 100644
--- a/litellm/llms/ai21.py
+++ b/litellm/llms/ai21.py
@@ -31,7 +31,7 @@ class AI21LLM:
         # set the api key
         if self.api_key == None:
             raise ValueError(
-                "Missing Baseten API Key - A call is being made to baseten but no key is set either in the environment variables or via params"
+                "Missing AI21 API Key - A call is being made to ai21 but no key is set either in the environment variables or via params"
             )
         self.api_key = api_key
         self.headers = {
diff --git a/litellm/main.py b/litellm/main.py
index f20f173ff7..9ab017bbbd 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -92,6 +92,7 @@ def completion(
     custom_llm_provider=None,
     custom_api_base=None,
     litellm_call_id=None,
+    litellm_logging_obj=None,
     # model specific optional params
     # used by text-bison only
     top_k=40,
@@ -100,6 +101,7 @@ def completion(
 ) -> ModelResponse:
     args = locals()
     try:
+        logging = litellm_logging_obj
         if fallbacks != []:
             return completion_with_fallbacks(**args)
         if litellm.model_alias_map and model in litellm.model_alias_map:
@@ -151,12 +153,7 @@ def completion(
             litellm_call_id=litellm_call_id,
             model_alias_map=litellm.model_alias_map,
         )
-        logging = Logging(
-            model=model,
-            messages=messages,
-            optional_params=optional_params,
-            litellm_params=litellm_params,
-        )
+        logging.update_environment_variables(optional_params=optional_params, litellm_params=litellm_params)
         if custom_llm_provider == "azure":
             # azure configs
             openai.api_type = "azure"
@@ -306,7 +303,7 @@ def completion(
                 response = openai.Completion.create(model=model, prompt=prompt, **optional_params)
             
             if "stream" in optional_params and optional_params["stream"] == True:
-                response = CustomStreamWrapper(response, model)
+                response = CustomStreamWrapper(response, model, logging_obj=logging)
                 return response
             ## LOGGING
             logging.post_call(
@@ -363,7 +360,7 @@ def completion(
             if "stream" in optional_params and optional_params["stream"] == True:
                 # don't try to access stream object,
                 # let the stream handler know this is replicate
-                response = CustomStreamWrapper(output, "replicate")
+                response = CustomStreamWrapper(output, "replicate", logging_obj=logging)
                 return response
             response = ""
             for item in output:
@@ -413,7 +410,7 @@ def completion(
             )
             if "stream" in optional_params and optional_params["stream"] == True:
                 # don't try to access stream object,
-                response = CustomStreamWrapper(model_response, model)
+                response = CustomStreamWrapper(model_response, model, logging_obj=logging)
                 return response
             response = model_response
         elif model in litellm.openrouter_models or custom_llm_provider == "openrouter":
@@ -486,7 +483,7 @@ def completion(
             response = co.generate(model=model, prompt=prompt, **optional_params)
             if "stream" in optional_params and optional_params["stream"] == True:
                 # don't try to access stream object,
-                response = CustomStreamWrapper(response, model)
+                response = CustomStreamWrapper(response, model, logging_obj=logging)
                 return response
             ## LOGGING
             logging.post_call(
@@ -532,7 +529,7 @@ def completion(
             if "stream" in optional_params and optional_params["stream"] == True:
                 # don't try to access stream object,
                 response = CustomStreamWrapper(
-                    model_response, model, custom_llm_provider="huggingface"
+                    model_response, model, custom_llm_provider="huggingface", logging_obj=logging
                 )
                 return response
             response = model_response
@@ -572,7 +569,7 @@ def completion(
                     headers=headers,
                 )
                 response = CustomStreamWrapper(
-                    res.iter_lines(), model, custom_llm_provider="together_ai"
+                    res.iter_lines(), model, custom_llm_provider="together_ai", logging_obj=logging
                 )
                 return response
             else:
@@ -689,7 +686,7 @@ def completion(
             if "stream" in optional_params and optional_params["stream"] == True:
                 # don't try to access stream object,
                 response = CustomStreamWrapper(
-                    model_response, model, custom_llm_provider="ai21"
+                    model_response, model, custom_llm_provider="ai21", logging_obj=logging
                 )
                 return response
             
@@ -732,7 +729,7 @@ def completion(
             if "stream" in optional_params and optional_params["stream"] == True:
                 # don't try to access stream object,
                 response = CustomStreamWrapper(
-                    model_response, model, custom_llm_provider="baseten"
+                    model_response, model, custom_llm_provider="baseten", logging_obj=logging
                 )
                 return response
             response = model_response
@@ -775,8 +772,6 @@ def completion(
             )
         return response
     except Exception as e:
-        ## LOGGING
-        logging.post_call(input=messages, api_key=api_key, original_response=e)
         ## Map to OpenAI Exception
         raise exception_type(
             model=model, custom_llm_provider=custom_llm_provider, original_exception=e
@@ -816,21 +811,12 @@ def batch_completion(*args, **kwargs):
     60
 )  ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
 def embedding(
-    model, input=[], azure=False, force_timeout=60, litellm_call_id=None, logger_fn=None
+    model, input=[], azure=False, force_timeout=60, litellm_call_id=None, litellm_logging_obj=None, logger_fn=None
 ):
     try:
         response = None
-        logging = Logging(
-            model=model,
-            messages=input,
-            optional_params={},
-            litellm_params={
-                "azure": azure,
-                "force_timeout": force_timeout,
-                "logger_fn": logger_fn,
-                "litellm_call_id": litellm_call_id,
-            },
-        )
+        logging = litellm_logging_obj
+        logging.update_environment_variables(optional_params={}, litellm_params={"force_timeout": force_timeout, "azure": azure, "litellm_call_id": litellm_call_id, "logger_fn": logger_fn})
         if azure == True:
             # azure configs
             openai.api_type = "azure"
@@ -849,7 +835,6 @@ def embedding(
             )
             ## EMBEDDING CALL
             response = openai.Embedding.create(input=input, engine=model)
-            print_verbose(f"response_value: {str(response)[:100]}")
         elif model in litellm.open_ai_embedding_models:
             openai.api_type = "openai"
             openai.api_base = "https://api.openai.com/v1"
@@ -867,15 +852,13 @@ def embedding(
             )
             ## EMBEDDING CALL
             response = openai.Embedding.create(input=input, model=model)
-            print_verbose(f"response_value: {str(response)[:100]}")
         else:
             args = locals()
             raise ValueError(f"No valid embedding model args passed in - {args}")
-
+        ## LOGGING
+        logging.post_call(input=input, api_key=openai.api_key, original_response=response)
         return response
     except Exception as e:
-        ## LOGGING
-        logging.post_call(input=input, api_key=openai.api_key, original_response=e)
         ## Map to OpenAI Exception
         raise exception_type(
             model=model,
diff --git a/litellm/tests/test_litedebugger_integration.py b/litellm/tests/test_litedebugger_integration.py
index e40e694fe6..3eca24361c 100644
--- a/litellm/tests/test_litedebugger_integration.py
+++ b/litellm/tests/test_litedebugger_integration.py
@@ -1,24 +1,30 @@
-# #### What this tests ####
-# #    This tests if logging to the litedebugger integration actually works
-# # pytest mistakes intentional bad calls as failed tests -> [TODO] fix this
-# import sys, os
-# import traceback
-# import pytest
+#### What this tests ####
+#    This tests if logging to the litedebugger integration actually works
+# pytest mistakes intentional bad calls as failed tests -> [TODO] fix this
+import sys, os
+import traceback
+import pytest
 
-# sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
-# import litellm
-# from litellm import embedding, completion
+sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+import litellm
+from litellm import embedding, completion
 
-# litellm.set_verbose = True
+litellm.set_verbose = True
 
-# litellm.email = "krrish@berri.ai"
+litellm.use_client = True
 
-# user_message = "Hello, how are you?"
-# messages = [{ "content": user_message,"role": "user"}]
+user_message = "Hello, how are you?"
+messages = [{ "content": user_message,"role": "user"}]
 
 
-# #openai call
-# response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
+# Test 1: On completion call
+response = completion(model="claude-instant-1", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
 # print(f"response: {response}")
-# #bad request call
-# # response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}])
+
+# # Test 2: On embedding call
+# response = embedding(model="text-embedding-ada-002", input=["sample text"])
+# print(f"response: {response}")
+
+# # Test 3: On streaming completion call
+response = completion(model="replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}], stream=True)
+print(f"response: {response}")
\ No newline at end of file
diff --git a/litellm/utils.py b/litellm/utils.py
index bc97891251..fedf960c0a 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -141,27 +141,41 @@ def install_and_import(package: str):
 
 
 ####### LOGGING ###################
+from enum import Enum
+
+class CallTypes(Enum):
+    embedding = 'embedding'
+    completion = 'completion'
+
 # Logging function -> log the exact model details + what's being sent | Non-Blocking
 class Logging:
     global supabaseClient, liteDebuggerClient
 
-    def __init__(self, model, messages, optional_params, litellm_params):
+    def __init__(self, model, messages, stream, call_type, litellm_call_id):
+        if call_type not in [item.value for item in CallTypes]:
+            allowed_values = ", ".join([item.value for item in CallTypes])
+            raise ValueError(f"Invalid call_type {call_type}. Allowed values: {allowed_values}")
         self.model = model
         self.messages = messages
+        self.stream = stream
+        self.call_type = call_type
+        self.litellm_call_id = litellm_call_id
+    
+    def update_environment_variables(self, optional_params, litellm_params):
         self.optional_params = optional_params
         self.litellm_params = litellm_params
         self.logger_fn = litellm_params["logger_fn"]
         print_verbose(f"self.optional_params: {self.optional_params}")
         self.model_call_details = {
-            "model": model,
-            "messages": messages,
+            "model": self.model,
+            "messages": self.messages,
             "optional_params": self.optional_params,
             "litellm_params": self.litellm_params,
         }
 
     def pre_call(self, input, api_key, model=None, additional_args={}):
         try:
-            print_verbose(f"logging pre call for model: {self.model}")
+            print_verbose(f"logging pre call for model: {self.model} with call type: {self.call_type}")
             self.model_call_details["input"] = input
             self.model_call_details["api_key"] = api_key
             self.model_call_details["additional_args"] = additional_args
@@ -215,6 +229,7 @@ class Logging:
                             litellm_params=self.model_call_details["litellm_params"],
                             optional_params=self.model_call_details["optional_params"],
                             print_verbose=print_verbose,
+                            call_type=self.call_type, 
                         )
                 except Exception as e:
                     print_verbose(
@@ -235,7 +250,7 @@ class Logging:
             if capture_exception:  # log this error to sentry for debugging
                 capture_exception(e)
 
-    def post_call(self, input, api_key, original_response, additional_args={}):
+    def post_call(self, original_response, input=None, api_key=None,  additional_args={}):
         # Do something here
         try:
             self.model_call_details["input"] = input
@@ -262,13 +277,13 @@ class Logging:
                 try:
                     if callback == "lite_debugger":
                         print_verbose("reaches litedebugger for post-call logging!")
-                        model = self.model_call_details["model"]
-                        messages = self.model_call_details["input"]
                         print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
                         liteDebuggerClient.post_call_log_event(
                             original_response=original_response,
                             litellm_call_id=self.litellm_params["litellm_call_id"],
                             print_verbose=print_verbose,
+                            call_type = self.call_type, 
+                            stream = self.stream
                         )
                 except:
                     print_verbose(
@@ -285,7 +300,72 @@ class Logging:
             )
             pass
 
-    # Add more methods as needed
+    
+    def success_handler(self, result, start_time, end_time):
+        try:
+            for callback in litellm.success_callback:
+                try:
+                    if callback == "lite_debugger":
+                            print_verbose("reaches lite_debugger for logging!")
+                            print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
+                            print_verbose(f"liteDebuggerClient details function {self.call_type} and stream set to {self.stream}")
+                            liteDebuggerClient.log_event(
+                                end_user=litellm._thread_context.user,
+                                response_obj=result,
+                                start_time=start_time,
+                                end_time=end_time,
+                                litellm_call_id=self.litellm_call_id,
+                                print_verbose=print_verbose,
+                                call_type = self.call_type, 
+                                stream = self.stream
+                            )
+                except Exception as e:
+                    print_verbose(
+                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while post-call logging with integrations {traceback.format_exc()}"
+                    )
+                    print_verbose(
+                        f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
+                    )
+                    if capture_exception:  # log this error to sentry for debugging
+                        capture_exception(e)
+        except:
+            print_verbose(
+                f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
+            )
+            pass
+
+    def failure_handler(self, exception, traceback_exception, start_time, end_time):
+        try:
+            for callback in litellm.failure_callback:
+                if callback == "lite_debugger":
+                        print_verbose("reaches lite_debugger for logging!")
+                        print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
+                        result = {
+                            "model": self.model,
+                            "created": time.time(),
+                            "error": traceback_exception,
+                            "usage": {
+                                "prompt_tokens": prompt_token_calculator(
+                                    self.model, messages=self.messages
+                                ),
+                                "completion_tokens": 0,
+                            },
+                        }
+                        liteDebuggerClient.log_event(
+                            model=self.model,
+                            messages=self.messages,
+                            end_user=litellm._thread_context.user,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            litellm_call_id=self.litellm_call_id,
+                            print_verbose=print_verbose,
+                            call_type = self.call_type, 
+                            stream = self.stream
+                        )
+            pass
+        except:
+            pass
 
 
 def exception_logging(
@@ -327,7 +407,7 @@ def client(original_function):
         *args, **kwargs
     ):  # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
         try:
-            global callback_list, add_breadcrumb, user_logger_fn
+            global callback_list, add_breadcrumb, user_logger_fn, Logging
             if (
                 litellm.email is not None
                 or os.getenv("LITELLM_EMAIL", None) is not None
@@ -369,12 +449,22 @@ def client(original_function):
                 )
             if "logger_fn" in kwargs:
                 user_logger_fn = kwargs["logger_fn"]
-            # LOG SUCCESS
+            # CRASH REPORTING TELEMETRY
             crash_reporting(*args, **kwargs)
+            # INIT LOGGER - for user-specified integrations
+            model = args[0] if len(args) > 1 else kwargs["model"]
+            call_type = original_function.__name__
+            if call_type == CallTypes.completion.value:
+                messages = args[1] if len(args) > 2 else kwargs["messages"]
+            elif call_type == CallTypes.embedding.value:
+                messages = args[1] if len(args) > 2 else kwargs["input"]
+            stream = True if "stream" in kwargs and kwargs["stream"] == True else False
+            logging_obj = Logging(model=model, messages=messages, stream=stream, litellm_call_id=kwargs["litellm_call_id"], call_type=call_type)
+            return logging_obj
         except:  # DO NOT BLOCK running the function because of this
             print_verbose(f"[Non-Blocking] {traceback.format_exc()}")
         pass
-
+    
     def crash_reporting(*args, **kwargs):
         if litellm.telemetry:
             try:
@@ -397,10 +487,11 @@ def client(original_function):
     def wrapper(*args, **kwargs):
         start_time = None
         result = None
+        litellm_call_id = str(uuid.uuid4())
+        kwargs["litellm_call_id"] = litellm_call_id
+        logging_obj = function_setup(*args, **kwargs)
+        kwargs["litellm_logging_obj"] = logging_obj
         try:
-            function_setup(*args, **kwargs)
-            litellm_call_id = str(uuid.uuid4())
-            kwargs["litellm_call_id"] = litellm_call_id
             start_time = datetime.datetime.now()
             # [OPTIONAL] CHECK CACHE
             # remove this after deprecating litellm.caching
@@ -415,10 +506,13 @@ def client(original_function):
 
             # MODEL CALL
             result = original_function(*args, **kwargs)
+            end_time = datetime.datetime.now()
+            # LOG SUCCESS
+            logging_obj.success_handler(result, start_time, end_time)
+            
             if "stream" in kwargs and kwargs["stream"] == True:
                 # TODO: Add to cache for streaming
                 return result
-            end_time = datetime.datetime.now()
             # [OPTIONAL] ADD TO CACHE
             if litellm.caching or litellm.caching_with_models or litellm.cache != None: # user init a cache object
                 litellm.cache.add_cache(result, *args, **kwargs)
@@ -433,6 +527,7 @@ def client(original_function):
             traceback_exception = traceback.format_exc()
             crash_reporting(*args, **kwargs, exception=traceback_exception)
             end_time = datetime.datetime.now()
+            logging_obj.failure_handler(e, traceback_exception, start_time, end_time)
             my_thread = threading.Thread(
                 target=handle_failure,
                 args=(e, traceback_exception, start_time, end_time, args, kwargs),
@@ -917,44 +1012,6 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k
                         litellm_call_id=kwargs["litellm_call_id"],
                         print_verbose=print_verbose,
                     )
-                elif callback == "lite_debugger":
-                    print_verbose("reaches lite_debugger for logging!")
-                    print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
-                    model = args[0] if len(args) > 0 else kwargs["model"]
-                    messages = (
-                        args[1]
-                        if len(args) > 1
-                        else kwargs.get(
-                            "messages",
-                            [
-                                {
-                                    "role": "user",
-                                    "content": " ".join(kwargs.get("input", "")),
-                                }
-                            ],
-                        )
-                    )
-                    result = {
-                        "model": model,
-                        "created": time.time(),
-                        "error": traceback_exception,
-                        "usage": {
-                            "prompt_tokens": prompt_token_calculator(
-                                model, messages=messages
-                            ),
-                            "completion_tokens": 0,
-                        },
-                    }
-                    liteDebuggerClient.log_event(
-                        model=model,
-                        messages=messages,
-                        end_user=litellm._thread_context.user,
-                        response_obj=result,
-                        start_time=start_time,
-                        end_time=end_time,
-                        litellm_call_id=kwargs["litellm_call_id"],
-                        print_verbose=print_verbose,
-                    )
             except:
                 print_verbose(
                     f"Error Occurred while logging failure: {traceback.format_exc()}"
@@ -1085,32 +1142,6 @@ def handle_success(args, kwargs, result, start_time, end_time):
                         litellm_call_id=kwargs["litellm_call_id"],
                         print_verbose=print_verbose,
                     )
-                elif callback == "lite_debugger":
-                    print_verbose("reaches lite_debugger for logging!")
-                    print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
-                    messages = (
-                        args[1]
-                        if len(args) > 1
-                        else kwargs.get(
-                            "messages",
-                            [
-                                {
-                                    "role": "user",
-                                    "content": " ".join(kwargs.get("input", "")),
-                                }
-                            ],
-                        )
-                    )
-                    liteDebuggerClient.log_event(
-                        model=model,
-                        messages=messages,
-                        end_user=litellm._thread_context.user,
-                        response_obj=result,
-                        start_time=start_time,
-                        end_time=end_time,
-                        litellm_call_id=kwargs["litellm_call_id"],
-                        print_verbose=print_verbose,
-                    )
             except Exception as e:
                 # LOGGING
                 exception_logging(logger_fn=user_logger_fn, exception=e)
@@ -1486,9 +1517,10 @@ def get_secret(secret_name):
 # wraps the completion stream to return the correct format for the model
 # replicate/anthropic/cohere
 class CustomStreamWrapper:
-    def __init__(self, completion_stream, model, custom_llm_provider=None):
+    def __init__(self, completion_stream, model, custom_llm_provider=None, logging_obj=None):
         self.model = model
         self.custom_llm_provider = custom_llm_provider
+        self.logging_obj = logging_obj
         if model in litellm.cohere_models:
             # cohere does not return an iterator, so we need to wrap it in one
             self.completion_stream = iter(completion_stream)
@@ -1497,6 +1529,10 @@ class CustomStreamWrapper:
 
     def __iter__(self):
         return self
+    
+    def logging(self, text):
+        if self.logging_obj: 
+            self.logging_obj.post_call(text)
 
     def handle_anthropic_chunk(self, chunk):
         str_line = chunk.decode("utf-8")  # Convert bytes to string
@@ -1586,6 +1622,8 @@ class CustomStreamWrapper:
         elif self.model in litellm.open_ai_text_completion_models:
             chunk = next(self.completion_stream)
             completion_obj["content"] = self.handle_openai_text_completion_chunk(chunk)
+        # LOGGING
+        self.logging_obj(completion_obj["content"])
         # return this for all models
         return {"choices": [{"delta": completion_obj}]}
 
diff --git a/pyproject.toml b/pyproject.toml
index 9f625122f5..d332b4e36a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.495"
+version = "0.1.496"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"