From 4f9120553058f620000b9e1b5e506462f44dd3f7 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 15 Jun 2024 10:57:20 -0700
Subject: [PATCH] refactor(utils.py): refactor Logging to it's own class. Cut
 down utils.py to <10k lines.

Easier debugging

 Reference: https://github.com/BerriAI/litellm/issues/4206
---
 .pre-commit-config.yaml                       |   14 +-
 litellm/__init__.py                           |    8 +-
 litellm/_logging.py                           |    9 +-
 litellm/cost_calculator.py                    |  221 +-
 litellm/litellm_core_utils/core_helpers.py    |   41 +
 litellm/litellm_core_utils/litellm_logging.py | 3215 +++++++++++++++++
 litellm/litellm_core_utils/redact_messages.py |    4 +-
 litellm/llms/anthropic.py                     |    8 +-
 litellm/llms/base.py                          |    2 +-
 litellm/llms/bedrock.py                       |   16 +-
 litellm/llms/bedrock_httpx.py                 |   72 +-
 litellm/llms/databricks.py                    |    4 +-
 litellm/llms/predibase.py                     |    4 +-
 litellm/llms/triton.py                        |    1 -
 litellm/llms/vertex_ai.py                     |    3 +-
 litellm/llms/vertex_ai_anthropic.py           |    3 +-
 litellm/llms/vertex_httpx.py                  |    7 +-
 litellm/proxy/utils.py                        |    6 +-
 litellm/types/utils.py                        |  910 +++++
 litellm/utils.py                              | 2932 +--------------
 20 files changed, 4517 insertions(+), 2963 deletions(-)
 create mode 100644 litellm/litellm_core_utils/core_helpers.py
 create mode 100644 litellm/litellm_core_utils/litellm_logging.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 2d85031b5..74f165bdd 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -24,10 +24,10 @@ repos:
         language: system
         types: [python]
         files: ^litellm/
-    # -   id: check-file-length
-    #     name: Check file length
-    #     entry: python check_file_length.py
-    #     args: ["10000"]  # set your desired maximum number of lines
-    #     language: python
-    #     files: litellm/.*\.py
-    #     exclude: ^litellm/tests/
\ No newline at end of file
+    -   id: check-file-length
+        name: Check file length
+        entry: python check_file_length.py
+        args: ["10000"]  # set your desired maximum number of lines
+        language: python
+        files: litellm/.*\.py
+        exclude: ^litellm/tests/
\ No newline at end of file
diff --git a/litellm/__init__.py b/litellm/__init__.py
index 6ecf70d0d..4c9baac19 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -723,12 +723,10 @@ from .utils import (
     token_counter,
     create_pretrained_tokenizer,
     create_tokenizer,
-    cost_per_token,
     supports_function_calling,
     supports_parallel_function_calling,
     supports_vision,
     get_litellm_params,
-    Logging,
     acreate,
     get_model_list,
     get_max_tokens,
@@ -748,9 +746,10 @@ from .utils import (
     get_first_chars_messages,
     ModelResponse,
     ImageResponse,
-    ImageObject,
     get_provider_fields,
 )
+
+from .types.utils import ImageObject
 from .llms.huggingface_restapi import HuggingfaceConfig
 from .llms.anthropic import AnthropicConfig
 from .llms.databricks import DatabricksConfig, DatabricksEmbeddingConfig
@@ -827,4 +826,5 @@ from .router import Router
 from .assistants.main import *
 from .batches.main import *
 from .scheduler import *
-from .cost_calculator import response_cost_calculator
+from .cost_calculator import response_cost_calculator, cost_per_token
+from litellm.litellm_core_utils.litellm_logging import Logging
diff --git a/litellm/_logging.py b/litellm/_logging.py
index ab7a08f97..52a445b49 100644
--- a/litellm/_logging.py
+++ b/litellm/_logging.py
@@ -3,10 +3,17 @@ from logging import Formatter
 import traceback
 
 set_verbose = False
+
+if set_verbose is True:
+    logging.warning(
+        "`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs."
+    )
 json_logs = bool(os.getenv("JSON_LOGS", False))
 # Create a handler for the logger (you may need to adapt this based on your needs)
+log_level = os.getenv("LITELLM_LOG", "ERROR")
+numeric_level: str = getattr(logging, log_level.upper())
 handler = logging.StreamHandler()
-handler.setLevel(logging.DEBUG)
+handler.setLevel(numeric_level)
 
 
 class JsonFormatter(Formatter):
diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
index d1e2dab52..c84df53e8 100644
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@@ -1,6 +1,6 @@
 # What is this?
 ## File for 'response_cost' calculation in Logging
-from typing import Optional, Union, Literal, List
+from typing import Optional, Union, Literal, List, Tuple
 import litellm._logging
 from litellm.utils import (
     ModelResponse,
@@ -9,7 +9,6 @@ from litellm.utils import (
     TranscriptionResponse,
     TextCompletionResponse,
     CallTypes,
-    cost_per_token,
     print_verbose,
     CostPerToken,
     token_counter,
@@ -18,6 +17,224 @@ import litellm
 from litellm import verbose_logger
 
 
+def _cost_per_token_custom_pricing_helper(
+    prompt_tokens=0,
+    completion_tokens=0,
+    response_time_ms=None,
+    ### CUSTOM PRICING ###
+    custom_cost_per_token: Optional[CostPerToken] = None,
+    custom_cost_per_second: Optional[float] = None,
+) -> Optional[Tuple[float, float]]:
+    """Internal helper function for calculating cost, if custom pricing given"""
+    if custom_cost_per_token is None and custom_cost_per_second is None:
+        return None
+
+    if custom_cost_per_token is not None:
+        input_cost = custom_cost_per_token["input_cost_per_token"] * prompt_tokens
+        output_cost = custom_cost_per_token["output_cost_per_token"] * completion_tokens
+        return input_cost, output_cost
+    elif custom_cost_per_second is not None:
+        output_cost = custom_cost_per_second * response_time_ms / 1000  # type: ignore
+        return 0, output_cost
+
+    return None
+
+
+def cost_per_token(
+    model: str = "",
+    prompt_tokens=0,
+    completion_tokens=0,
+    response_time_ms=None,
+    custom_llm_provider=None,
+    region_name=None,
+    ### CUSTOM PRICING ###
+    custom_cost_per_token: Optional[CostPerToken] = None,
+    custom_cost_per_second: Optional[float] = None,
+) -> Tuple[float, float]:
+    """
+    Calculates the cost per token for a given model, prompt tokens, and completion tokens.
+
+    Parameters:
+        model (str): The name of the model to use. Default is ""
+        prompt_tokens (int): The number of tokens in the prompt.
+        completion_tokens (int): The number of tokens in the completion.
+        response_time (float): The amount of time, in milliseconds, it took the call to complete.
+        custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list)
+        custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
+        custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
+
+    Returns:
+        tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively.
+    """
+    if model is None:
+        raise Exception("Invalid arg. Model cannot be none.")
+    ## CUSTOM PRICING ##
+    response_cost = _cost_per_token_custom_pricing_helper(
+        prompt_tokens=prompt_tokens,
+        completion_tokens=completion_tokens,
+        response_time_ms=response_time_ms,
+        custom_cost_per_second=custom_cost_per_second,
+        custom_cost_per_token=custom_cost_per_token,
+    )
+    if response_cost is not None:
+        return response_cost[0], response_cost[1]
+
+    # given
+    prompt_tokens_cost_usd_dollar: float = 0
+    completion_tokens_cost_usd_dollar: float = 0
+    model_cost_ref = litellm.model_cost
+    model_with_provider = model
+    if custom_llm_provider is not None:
+        model_with_provider = custom_llm_provider + "/" + model
+        if region_name is not None:
+            model_with_provider_and_region = (
+                f"{custom_llm_provider}/{region_name}/{model}"
+            )
+            if (
+                model_with_provider_and_region in model_cost_ref
+            ):  # use region based pricing, if it's available
+                model_with_provider = model_with_provider_and_region
+
+    model_without_prefix = model
+    model_parts = model.split("/")
+    if len(model_parts) > 1:
+        model_without_prefix = model_parts[1]
+    else:
+        model_without_prefix = model
+    """
+    Code block that formats model to lookup in litellm.model_cost
+    Option1. model = "bedrock/ap-northeast-1/anthropic.claude-instant-v1". This is the most accurate since it is region based. Should always be option 1
+    Option2. model = "openai/gpt-4"       - model = provider/model
+    Option3. model = "anthropic.claude-3" - model = model
+    """
+    if (
+        model_with_provider in model_cost_ref
+    ):  # Option 2. use model with provider, model = "openai/gpt-4"
+        model = model_with_provider
+    elif model in model_cost_ref:  # Option 1. use model passed, model="gpt-4"
+        model = model
+    elif (
+        model_without_prefix in model_cost_ref
+    ):  # Option 3. if user passed model="bedrock/anthropic.claude-3", use model="anthropic.claude-3"
+        model = model_without_prefix
+
+    # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
+    print_verbose(f"Looking up model={model} in model_cost_map")
+    if model in model_cost_ref:
+        print_verbose(f"Success: model={model} in model_cost_map")
+        print_verbose(
+            f"prompt_tokens={prompt_tokens}; completion_tokens={completion_tokens}"
+        )
+        if (
+            model_cost_ref[model].get("input_cost_per_token", None) is not None
+            and model_cost_ref[model].get("output_cost_per_token", None) is not None
+        ):
+            ## COST PER TOKEN ##
+            prompt_tokens_cost_usd_dollar = (
+                model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
+            )
+            completion_tokens_cost_usd_dollar = (
+                model_cost_ref[model]["output_cost_per_token"] * completion_tokens
+            )
+        elif (
+            model_cost_ref[model].get("output_cost_per_second", None) is not None
+            and response_time_ms is not None
+        ):
+            print_verbose(
+                f"For model={model} - output_cost_per_second: {model_cost_ref[model].get('output_cost_per_second')}; response time: {response_time_ms}"
+            )
+            ## COST PER SECOND ##
+            prompt_tokens_cost_usd_dollar = 0
+            completion_tokens_cost_usd_dollar = (
+                model_cost_ref[model]["output_cost_per_second"]
+                * response_time_ms
+                / 1000
+            )
+        elif (
+            model_cost_ref[model].get("input_cost_per_second", None) is not None
+            and response_time_ms is not None
+        ):
+            print_verbose(
+                f"For model={model} - input_cost_per_second: {model_cost_ref[model].get('input_cost_per_second')}; response time: {response_time_ms}"
+            )
+            ## COST PER SECOND ##
+            prompt_tokens_cost_usd_dollar = (
+                model_cost_ref[model]["input_cost_per_second"] * response_time_ms / 1000
+            )
+            completion_tokens_cost_usd_dollar = 0.0
+        print_verbose(
+            f"Returned custom cost for model={model} - prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}, completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}"
+        )
+        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    elif "ft:gpt-3.5-turbo" in model:
+        print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
+        # fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm
+        prompt_tokens_cost_usd_dollar = (
+            model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens
+        )
+        completion_tokens_cost_usd_dollar = (
+            model_cost_ref["ft:gpt-3.5-turbo"]["output_cost_per_token"]
+            * completion_tokens
+        )
+        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    elif "ft:davinci-002" in model:
+        print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
+        # fuzzy match ft:davinci-002:abcd-id-cool-litellm
+        prompt_tokens_cost_usd_dollar = (
+            model_cost_ref["ft:davinci-002"]["input_cost_per_token"] * prompt_tokens
+        )
+        completion_tokens_cost_usd_dollar = (
+            model_cost_ref["ft:davinci-002"]["output_cost_per_token"]
+            * completion_tokens
+        )
+        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    elif "ft:babbage-002" in model:
+        print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
+        # fuzzy match ft:babbage-002:abcd-id-cool-litellm
+        prompt_tokens_cost_usd_dollar = (
+            model_cost_ref["ft:babbage-002"]["input_cost_per_token"] * prompt_tokens
+        )
+        completion_tokens_cost_usd_dollar = (
+            model_cost_ref["ft:babbage-002"]["output_cost_per_token"]
+            * completion_tokens
+        )
+        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    elif model in litellm.azure_llms:
+        verbose_logger.debug(f"Cost Tracking: {model} is an Azure LLM")
+        model = litellm.azure_llms[model]
+        verbose_logger.debug(
+            f"applying cost={model_cost_ref[model]['input_cost_per_token']} for prompt_tokens={prompt_tokens}"
+        )
+        prompt_tokens_cost_usd_dollar = (
+            model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
+        )
+        verbose_logger.debug(
+            f"applying cost={model_cost_ref[model]['output_cost_per_token']} for completion_tokens={completion_tokens}"
+        )
+        completion_tokens_cost_usd_dollar = (
+            model_cost_ref[model]["output_cost_per_token"] * completion_tokens
+        )
+        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    elif model in litellm.azure_embedding_models:
+        verbose_logger.debug(f"Cost Tracking: {model} is an Azure Embedding Model")
+        model = litellm.azure_embedding_models[model]
+        prompt_tokens_cost_usd_dollar = (
+            model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
+        )
+        completion_tokens_cost_usd_dollar = (
+            model_cost_ref[model]["output_cost_per_token"] * completion_tokens
+        )
+        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    else:
+        # if model is not in model_prices_and_context_window.json. Raise an exception-let users know
+        error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n"
+        raise litellm.exceptions.NotFoundError(  # type: ignore
+            message=error_str,
+            model=model,
+            llm_provider="",
+        )
+
+
 # Extract the number of billion parameters from the model name
 # only used for together_computer LLMs
 def get_model_params_and_category(model_name) -> str:
diff --git a/litellm/litellm_core_utils/core_helpers.py b/litellm/litellm_core_utils/core_helpers.py
new file mode 100644
index 000000000..7b911895d
--- /dev/null
+++ b/litellm/litellm_core_utils/core_helpers.py
@@ -0,0 +1,41 @@
+# What is this?
+## Helper utilities for the model response objects
+
+
+def map_finish_reason(
+    finish_reason: str,
+):  # openai supports 5 stop sequences - 'stop', 'length', 'function_call', 'content_filter', 'null'
+    # anthropic mapping
+    if finish_reason == "stop_sequence":
+        return "stop"
+    # cohere mapping - https://docs.cohere.com/reference/generate
+    elif finish_reason == "COMPLETE":
+        return "stop"
+    elif finish_reason == "MAX_TOKENS":  # cohere + vertex ai
+        return "length"
+    elif finish_reason == "ERROR_TOXIC":
+        return "content_filter"
+    elif (
+        finish_reason == "ERROR"
+    ):  # openai currently doesn't support an 'error' finish reason
+        return "stop"
+    # huggingface mapping https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/generate_stream
+    elif finish_reason == "eos_token" or finish_reason == "stop_sequence":
+        return "stop"
+    elif (
+        finish_reason == "FINISH_REASON_UNSPECIFIED" or finish_reason == "STOP"
+    ):  # vertex ai - got from running `print(dir(response_obj.candidates[0].finish_reason))`: ['FINISH_REASON_UNSPECIFIED', 'MAX_TOKENS', 'OTHER', 'RECITATION', 'SAFETY', 'STOP',]
+        return "stop"
+    elif finish_reason == "SAFETY":  # vertex ai
+        return "content_filter"
+    elif finish_reason == "STOP":  # vertex ai
+        return "stop"
+    elif finish_reason == "end_turn" or finish_reason == "stop_sequence":  # anthropic
+        return "stop"
+    elif finish_reason == "max_tokens":  # anthropic
+        return "length"
+    elif finish_reason == "tool_use":  # anthropic
+        return "tool_calls"
+    elif finish_reason == "content_filtered":
+        return "content_filter"
+    return finish_reason
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
new file mode 100644
index 000000000..ab9874fdc
--- /dev/null
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -0,0 +1,3215 @@
+# What is this?
+## Common Utility file for Logging handler
+# Logging function -> log the exact model details + what's being sent | Non-Blocking
+from litellm.types.utils import CallTypes
+from typing import Optional
+import datetime
+from litellm import (
+    verbose_logger,
+    json_logs,
+    log_raw_request_response,
+    turn_off_message_logging,
+)
+import traceback
+import litellm
+import copy
+from litellm.integrations.custom_logger import CustomLogger
+import json
+import time
+from litellm.utils import (
+    redact_message_input_output_from_logging,
+    _get_base_model_from_metadata,
+    supabaseClient,
+    liteDebuggerClient,
+    promptLayerLogger,
+    weightsBiasesLogger,
+    langsmithLogger,
+    logfireLogger,
+    capture_exception,
+    add_breadcrumb,
+    lunaryLogger,
+    prometheusLogger,
+    LogfireLevel,
+    print_verbose,
+    customLogger,
+    prompt_token_calculator,
+)
+from litellm.types.utils import (
+    ModelResponse,
+    EmbeddingResponse,
+    ImageResponse,
+    TranscriptionResponse,
+    TextCompletionResponse,
+)
+
+
+class Logging:
+    global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger, logfireLogger, prometheusLogger, slack_app
+    custom_pricing: bool = False
+    stream_options = None
+
+    def __init__(
+        self,
+        model,
+        messages,
+        stream,
+        call_type,
+        start_time,
+        litellm_call_id,
+        function_id,
+        dynamic_success_callbacks=None,
+        dynamic_failure_callbacks=None,
+        dynamic_async_success_callbacks=None,
+        langfuse_public_key=None,
+        langfuse_secret=None,
+    ):
+        if call_type not in [item.value for item in CallTypes]:
+            allowed_values = ", ".join([item.value for item in CallTypes])
+            raise ValueError(
+                f"Invalid call_type {call_type}. Allowed values: {allowed_values}"
+            )
+        if messages is not None:
+            if isinstance(messages, str):
+                messages = [
+                    {"role": "user", "content": messages}
+                ]  # convert text completion input to the chat completion format
+            elif (
+                isinstance(messages, list)
+                and len(messages) > 0
+                and isinstance(messages[0], str)
+            ):
+                new_messages = []
+                for m in messages:
+                    new_messages.append({"role": "user", "content": m})
+                messages = new_messages
+        self.model = model
+        self.messages = messages
+        self.stream = stream
+        self.start_time = start_time  # log the call start time
+        self.call_type = call_type
+        self.litellm_call_id = litellm_call_id
+        self.function_id = function_id
+        self.streaming_chunks = []  # for generating complete stream response
+        self.sync_streaming_chunks = []  # for generating complete stream response
+        self.model_call_details = {}
+        self.dynamic_input_callbacks = []  # [TODO] callbacks set for just that call
+        self.dynamic_failure_callbacks = dynamic_failure_callbacks
+        self.dynamic_success_callbacks = (
+            dynamic_success_callbacks  # callbacks set for just that call
+        )
+        self.dynamic_async_success_callbacks = (
+            dynamic_async_success_callbacks  # callbacks set for just that call
+        )
+        ## DYNAMIC LANGFUSE KEYS ##
+        self.langfuse_public_key = langfuse_public_key
+        self.langfuse_secret = langfuse_secret
+        ## TIME TO FIRST TOKEN LOGGING ##
+        self.completion_start_time: Optional[datetime.datetime] = None
+
+    def update_environment_variables(
+        self, model, user, optional_params, litellm_params, **additional_params
+    ):
+        self.optional_params = optional_params
+        self.model = model
+        self.user = user
+        self.litellm_params = litellm_params
+        self.logger_fn = litellm_params.get("logger_fn", None)
+        verbose_logger.debug(f"self.optional_params: {self.optional_params}")
+
+        self.model_call_details = {
+            "model": self.model,
+            "messages": self.messages,
+            "optional_params": self.optional_params,
+            "litellm_params": self.litellm_params,
+            "start_time": self.start_time,
+            "stream": self.stream,
+            "user": user,
+            "call_type": str(self.call_type),
+            "litellm_call_id": self.litellm_call_id,
+            "completion_start_time": self.completion_start_time,
+            **self.optional_params,
+            **additional_params,
+        }
+
+        ## check if stream options is set ##  - used by CustomStreamWrapper for easy instrumentation
+        if "stream_options" in additional_params:
+            self.stream_options = additional_params["stream_options"]
+        ## check if custom pricing set ##
+        if (
+            litellm_params.get("input_cost_per_token") is not None
+            or litellm_params.get("input_cost_per_second") is not None
+            or litellm_params.get("output_cost_per_token") is not None
+            or litellm_params.get("output_cost_per_second") is not None
+        ):
+            self.custom_pricing = True
+
+    def _pre_call(self, input, api_key, model=None, additional_args={}):
+        """
+        Common helper function across the sync + async pre-call function
+        """
+        self.model_call_details["input"] = input
+        self.model_call_details["api_key"] = api_key
+        self.model_call_details["additional_args"] = additional_args
+        self.model_call_details["log_event_type"] = "pre_api_call"
+        if (
+            model
+        ):  # if model name was changes pre-call, overwrite the initial model call name with the new one
+            self.model_call_details["model"] = model
+
+    def pre_call(self, input, api_key, model=None, additional_args={}):
+        # Log the exact input to the LLM API
+        litellm.error_logs["PRE_CALL"] = locals()
+        try:
+            self._pre_call(
+                input=input,
+                api_key=api_key,
+                model=model,
+                additional_args=additional_args,
+            )
+
+            # User Logging -> if you pass in a custom logging function
+            headers = additional_args.get("headers", {})
+            if headers is None:
+                headers = {}
+            data = additional_args.get("complete_input_dict", {})
+            api_base = additional_args.get("api_base", "")
+            self.model_call_details["litellm_params"]["api_base"] = str(
+                api_base
+            )  # used for alerting
+            masked_headers = {
+                k: (
+                    (v[:-44] + "*" * 44)
+                    if (isinstance(v, str) and len(v) > 44)
+                    else "*****"
+                )
+                for k, v in headers.items()
+            }
+            formatted_headers = " ".join(
+                [f"-H '{k}: {v}'" for k, v in masked_headers.items()]
+            )
+
+            verbose_logger.debug(f"PRE-API-CALL ADDITIONAL ARGS: {additional_args}")
+
+            curl_command = "\n\nPOST Request Sent from LiteLLM:\n"
+            curl_command += "curl -X POST \\\n"
+            curl_command += f"{api_base} \\\n"
+            curl_command += (
+                f"{formatted_headers} \\\n" if formatted_headers.strip() != "" else ""
+            )
+            curl_command += f"-d '{str(data)}'\n"
+            if additional_args.get("request_str", None) is not None:
+                # print the sagemaker / bedrock client request
+                curl_command = "\nRequest Sent from LiteLLM:\n"
+                curl_command += additional_args.get("request_str", None)
+            elif api_base == "":
+                curl_command = self.model_call_details
+
+            if json_logs:
+                verbose_logger.debug(
+                    "POST Request Sent from LiteLLM",
+                    extra={"api_base": {api_base}, **masked_headers},
+                )
+            else:
+                verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n")
+            # log raw request to provider (like LangFuse) -- if opted in.
+            if log_raw_request_response is True:
+                try:
+                    # [Non-blocking Extra Debug Information in metadata]
+                    _litellm_params = self.model_call_details.get("litellm_params", {})
+                    _metadata = _litellm_params.get("metadata", {}) or {}
+                    if (
+                        turn_off_message_logging is not None
+                        and turn_off_message_logging is True
+                    ):
+                        _metadata["raw_request"] = (
+                            "redacted by litellm. \
+                            'litellm.turn_off_message_logging=True'"
+                        )
+                    else:
+                        _metadata["raw_request"] = str(curl_command)
+                except Exception as e:
+                    _metadata["raw_request"] = (
+                        "Unable to Log \
+                        raw request: {}".format(
+                            str(e)
+                        )
+                    )
+            if self.logger_fn and callable(self.logger_fn):
+                try:
+                    self.logger_fn(
+                        self.model_call_details
+                    )  # Expectation: any logger function passed in by the user should accept a dict object
+                except Exception as e:
+                    verbose_logger.error(
+                        "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {}\n{}".format(
+                            str(e), traceback.format_exc()
+                        )
+                    )
+            # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
+            callbacks = litellm.input_callback + self.dynamic_input_callbacks
+            for callback in callbacks:
+                try:
+                    if callback == "supabase":
+                        verbose_logger.debug("reaches supabase for logging!")
+                        model = self.model_call_details["model"]
+                        messages = self.model_call_details["input"]
+                        verbose_logger.debug(f"supabaseClient: {supabaseClient}")
+                        supabaseClient.input_log_event(
+                            model=model,
+                            messages=messages,
+                            end_user=self.model_call_details.get("user", "default"),
+                            litellm_call_id=self.litellm_params["litellm_call_id"],
+                            print_verbose=print_verbose,
+                        )
+                    elif callback == "sentry" and add_breadcrumb:
+                        try:
+                            details_to_log = copy.deepcopy(self.model_call_details)
+                        except:
+                            details_to_log = self.model_call_details
+                        if litellm.turn_off_message_logging:
+                            # make a copy of the _model_Call_details and log it
+                            details_to_log.pop("messages", None)
+                            details_to_log.pop("input", None)
+                            details_to_log.pop("prompt", None)
+
+                        add_breadcrumb(
+                            category="litellm.llm_call",
+                            message=f"Model Call Details pre-call: {details_to_log}",
+                            level="info",
+                        )
+                    elif isinstance(callback, CustomLogger):  # custom logger class
+                        callback.log_pre_api_call(
+                            model=self.model,
+                            messages=self.messages,
+                            kwargs=self.model_call_details,
+                        )
+                    elif callable(callback):  # custom logger functions
+                        customLogger.log_input_event(
+                            model=self.model,
+                            messages=self.messages,
+                            kwargs=self.model_call_details,
+                            print_verbose=print_verbose,
+                            callback_func=callback,
+                        )
+                except Exception as e:
+                    verbose_logger.error(
+                        "litellm.Logging.pre_call(): Exception occured - {}\n{}".format(
+                            str(e), traceback.format_exc()
+                        )
+                    )
+                    verbose_logger.debug(
+                        f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
+                    )
+                    if capture_exception:  # log this error to sentry for debugging
+                        capture_exception(e)
+        except Exception:
+            verbose_logger.error(
+                "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {}\n{}".format(
+                    str(e), traceback.format_exc()
+                )
+            )
+            verbose_logger.error(
+                f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
+            )
+            if capture_exception:  # log this error to sentry for debugging
+                capture_exception(e)
+
+    def post_call(
+        self, original_response, input=None, api_key=None, additional_args={}
+    ):
+        # Log the exact result from the LLM API, for streaming - log the type of response received
+        litellm.error_logs["POST_CALL"] = locals()
+        if isinstance(original_response, dict):
+            original_response = json.dumps(original_response)
+        try:
+            self.model_call_details["input"] = input
+            self.model_call_details["api_key"] = api_key
+            self.model_call_details["original_response"] = original_response
+            self.model_call_details["additional_args"] = additional_args
+            self.model_call_details["log_event_type"] = "post_api_call"
+
+            verbose_logger.debug(
+                "RAW RESPONSE:\n{}\n\n".format(
+                    self.model_call_details.get(
+                        "original_response", self.model_call_details
+                    )
+                ),
+            )
+            if self.logger_fn and callable(self.logger_fn):
+                try:
+                    self.logger_fn(
+                        self.model_call_details
+                    )  # Expectation: any logger function passed in by the user should accept a dict object
+                except Exception as e:
+                    verbose_logger.debug(
+                        "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {}\n{}".format(
+                            str(e), traceback.format_exc()
+                        )
+                    )
+            original_response = redact_message_input_output_from_logging(
+                litellm_logging_obj=self, result=original_response
+            )
+            # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
+
+            callbacks = litellm.input_callback + self.dynamic_input_callbacks
+            for callback in callbacks:
+                try:
+                    if callback == "sentry" and add_breadcrumb:
+                        verbose_logger.debug("reaches sentry breadcrumbing")
+                        try:
+                            details_to_log = copy.deepcopy(self.model_call_details)
+                        except:
+                            details_to_log = self.model_call_details
+                        if litellm.turn_off_message_logging:
+                            # make a copy of the _model_Call_details and log it
+                            details_to_log.pop("messages", None)
+                            details_to_log.pop("input", None)
+                            details_to_log.pop("prompt", None)
+
+                        add_breadcrumb(
+                            category="litellm.llm_call",
+                            message=f"Model Call Details post-call: {details_to_log}",
+                            level="info",
+                        )
+                    elif isinstance(callback, CustomLogger):  # custom logger class
+                        callback.log_post_api_call(
+                            kwargs=self.model_call_details,
+                            response_obj=None,
+                            start_time=self.start_time,
+                            end_time=None,
+                        )
+                except Exception as e:
+                    verbose_logger.error(
+                        "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while post-call logging with integrations {}\n{}".format(
+                            str(e), traceback.format_exc()
+                        )
+                    )
+                    verbose_logger.debug(
+                        f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
+                    )
+                    if capture_exception:  # log this error to sentry for debugging
+                        capture_exception(e)
+        except Exception as e:
+            verbose_logger.error(
+                "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {}\n{}".format(
+                    str(e), traceback.format_exc()
+                )
+            )
+
+    def _success_handler_helper_fn(
+        self, result=None, start_time=None, end_time=None, cache_hit=None
+    ):
+        try:
+            if start_time is None:
+                start_time = self.start_time
+            if end_time is None:
+                end_time = datetime.datetime.now()
+            if self.completion_start_time is None:
+                self.completion_start_time = end_time
+                self.model_call_details["completion_start_time"] = (
+                    self.completion_start_time
+                )
+            self.model_call_details["log_event_type"] = "successful_api_call"
+            self.model_call_details["end_time"] = end_time
+            self.model_call_details["cache_hit"] = cache_hit
+            ## if model in model cost map - log the response cost
+            ## else set cost to None
+            verbose_logger.debug(f"Model={self.model};")
+            if (
+                result is not None
+                and (
+                    isinstance(result, ModelResponse)
+                    or isinstance(result, EmbeddingResponse)
+                    or isinstance(result, ImageResponse)
+                    or isinstance(result, TranscriptionResponse)
+                    or isinstance(result, TextCompletionResponse)
+                )
+                and self.stream != True
+            ):  # handle streaming separately
+                self.model_call_details["response_cost"] = (
+                    litellm.response_cost_calculator(
+                        response_object=result,
+                        model=self.model,
+                        cache_hit=self.model_call_details.get("cache_hit", False),
+                        custom_llm_provider=self.model_call_details.get(
+                            "custom_llm_provider", None
+                        ),
+                        base_model=_get_base_model_from_metadata(
+                            model_call_details=self.model_call_details
+                        ),
+                        call_type=self.call_type,
+                        optional_params=self.optional_params,
+                    )
+                )
+            else:  # streaming chunks + image gen.
+                self.model_call_details["response_cost"] = None
+
+            if (
+                litellm.max_budget
+                and self.stream == False
+                and result is not None
+                and "content" in result
+            ):
+                time_diff = (end_time - start_time).total_seconds()
+                float_diff = float(time_diff)
+                litellm._current_cost += litellm.completion_cost(
+                    model=self.model,
+                    prompt="",
+                    completion=result["content"],
+                    total_time=float_diff,
+                )
+
+            return start_time, end_time, result
+        except Exception as e:
+            raise Exception(f"[Non-Blocking] LiteLLM.Success_Call Error: {str(e)}")
+
+    def success_handler(
+        self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs
+    ):
+        verbose_logger.debug(
+            f"Logging Details LiteLLM-Success Call: Cache_hit={cache_hit}"
+        )
+        start_time, end_time, result = self._success_handler_helper_fn(
+            start_time=start_time,
+            end_time=end_time,
+            result=result,
+            cache_hit=cache_hit,
+        )
+        # print(f"original response in success handler: {self.model_call_details['original_response']}")
+        try:
+            verbose_logger.debug(f"success callbacks: {litellm.success_callback}")
+            ## BUILD COMPLETE STREAMED RESPONSE
+            complete_streaming_response = None
+            if self.stream and isinstance(result, ModelResponse):
+                if (
+                    result.choices[0].finish_reason is not None
+                ):  # if it's the last chunk
+                    self.sync_streaming_chunks.append(result)
+                    # print_verbose(f"final set of received chunks: {self.sync_streaming_chunks}")
+                    try:
+                        complete_streaming_response = litellm.stream_chunk_builder(
+                            self.sync_streaming_chunks,
+                            messages=self.model_call_details.get("messages", None),
+                            start_time=start_time,
+                            end_time=end_time,
+                        )
+                    except Exception as e:
+                        verbose_logger.error(
+                            "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while building complete streaming response in success logging {}\n{}".format(
+                                str(e), traceback.format_exc()
+                            ),
+                            log_level="ERROR",
+                        )
+                        complete_streaming_response = None
+                else:
+                    self.sync_streaming_chunks.append(result)
+
+            if complete_streaming_response is not None:
+                verbose_logger.debug(
+                    f"Logging Details LiteLLM-Success Call streaming complete"
+                )
+                self.model_call_details["complete_streaming_response"] = (
+                    complete_streaming_response
+                )
+                self.model_call_details["response_cost"] = (
+                    litellm.response_cost_calculator(
+                        response_object=complete_streaming_response,
+                        model=self.model,
+                        cache_hit=self.model_call_details.get("cache_hit", False),
+                        custom_llm_provider=self.model_call_details.get(
+                            "custom_llm_provider", None
+                        ),
+                        base_model=_get_base_model_from_metadata(
+                            model_call_details=self.model_call_details
+                        ),
+                        call_type=self.call_type,
+                        optional_params=self.optional_params,
+                    )
+                )
+            if self.dynamic_success_callbacks is not None and isinstance(
+                self.dynamic_success_callbacks, list
+            ):
+                callbacks = self.dynamic_success_callbacks
+                ## keep the internal functions ##
+                for callback in litellm.success_callback:
+                    if (
+                        isinstance(callback, CustomLogger)
+                        and "_PROXY_" in callback.__class__.__name__
+                    ):
+                        callbacks.append(callback)
+            else:
+                callbacks = litellm.success_callback
+
+            result = redact_message_input_output_from_logging(
+                result=result, litellm_logging_obj=self
+            )
+
+            for callback in callbacks:
+                try:
+                    litellm_params = self.model_call_details.get("litellm_params", {})
+                    if litellm_params.get("no-log", False) == True:
+                        # proxy cost tracking cal backs should run
+                        if not (
+                            isinstance(callback, CustomLogger)
+                            and "_PROXY_" in callback.__class__.__name__
+                        ):
+                            print_verbose("no-log request, skipping logging")
+                            continue
+                    if callback == "lite_debugger":
+                        print_verbose("reaches lite_debugger for logging!")
+                        print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
+                        print_verbose(
+                            f"liteDebuggerClient details function {self.call_type} and stream set to {self.stream}"
+                        )
+                        liteDebuggerClient.log_event(
+                            end_user=kwargs.get("user", "default"),
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            litellm_call_id=self.litellm_call_id,
+                            print_verbose=print_verbose,
+                            call_type=self.call_type,
+                            stream=self.stream,
+                        )
+                    if callback == "promptlayer":
+                        print_verbose("reaches promptlayer for logging!")
+                        promptLayerLogger.log_event(
+                            kwargs=self.model_call_details,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            print_verbose=print_verbose,
+                        )
+                    if callback == "supabase":
+                        print_verbose("reaches supabase for logging!")
+                        kwargs = self.model_call_details
+
+                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+                        if self.stream:
+                            if "complete_streaming_response" not in kwargs:
+                                continue
+                            else:
+                                print_verbose("reaches supabase for streaming logging!")
+                                result = kwargs["complete_streaming_response"]
+
+                        model = kwargs["model"]
+                        messages = kwargs["messages"]
+                        optional_params = kwargs.get("optional_params", {})
+                        litellm_params = kwargs.get("litellm_params", {})
+                        supabaseClient.log_event(
+                            model=model,
+                            messages=messages,
+                            end_user=optional_params.get("user", "default"),
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            litellm_call_id=litellm_params.get(
+                                "litellm_call_id", str(uuid.uuid4())
+                            ),
+                            print_verbose=print_verbose,
+                        )
+                    if callback == "wandb":
+                        print_verbose("reaches wandb for logging!")
+                        weightsBiasesLogger.log_event(
+                            kwargs=self.model_call_details,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            print_verbose=print_verbose,
+                        )
+                    if callback == "langsmith":
+                        print_verbose("reaches langsmith for logging!")
+                        if self.stream:
+                            if "complete_streaming_response" not in kwargs:
+                                continue
+                            else:
+                                print_verbose(
+                                    "reaches langsmith for streaming logging!"
+                                )
+                                result = kwargs["complete_streaming_response"]
+                        langsmithLogger.log_event(
+                            kwargs=self.model_call_details,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            print_verbose=print_verbose,
+                        )
+                    if callback == "logfire":
+                        global logfireLogger
+                        verbose_logger.debug("reaches logfire for success logging!")
+                        kwargs = {}
+                        for k, v in self.model_call_details.items():
+                            if (
+                                k != "original_response"
+                            ):  # copy.deepcopy raises errors as this could be a coroutine
+                                kwargs[k] = v
+
+                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+                        if self.stream:
+                            if "complete_streaming_response" not in kwargs:
+                                continue
+                            else:
+                                print_verbose("reaches logfire for streaming logging!")
+                                result = kwargs["complete_streaming_response"]
+
+                        logfireLogger.log_event(
+                            kwargs=self.model_call_details,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            print_verbose=print_verbose,
+                            level=LogfireLevel.INFO.value,
+                        )
+
+                    if callback == "lunary":
+                        print_verbose("reaches lunary for logging!")
+                        model = self.model
+                        kwargs = self.model_call_details
+
+                        input = kwargs.get("messages", kwargs.get("input", None))
+
+                        type = (
+                            "embed"
+                            if self.call_type == CallTypes.embedding.value
+                            else "llm"
+                        )
+
+                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+                        if self.stream:
+                            if "complete_streaming_response" not in kwargs:
+                                continue
+                            else:
+                                result = kwargs["complete_streaming_response"]
+
+                        lunaryLogger.log_event(
+                            type=type,
+                            kwargs=kwargs,
+                            event="end",
+                            model=model,
+                            input=input,
+                            user_id=kwargs.get("user", None),
+                            # user_props=self.model_call_details.get("user_props", None),
+                            extra=kwargs.get("optional_params", {}),
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            run_id=self.litellm_call_id,
+                            print_verbose=print_verbose,
+                        )
+                    if callback == "helicone":
+                        print_verbose("reaches helicone for logging!")
+                        model = self.model
+                        messages = self.model_call_details["input"]
+                        heliconeLogger.log_success(
+                            model=model,
+                            messages=messages,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            print_verbose=print_verbose,
+                        )
+                    if callback == "langfuse":
+                        global langFuseLogger
+                        verbose_logger.debug("reaches langfuse for success logging!")
+                        kwargs = {}
+                        for k, v in self.model_call_details.items():
+                            if (
+                                k != "original_response"
+                            ):  # copy.deepcopy raises errors as this could be a coroutine
+                                kwargs[k] = v
+                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+                        if self.stream:
+                            verbose_logger.debug(
+                                f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
+                            )
+                            if complete_streaming_response is None:
+                                continue
+                            else:
+                                print_verbose("reaches langfuse for streaming logging!")
+                                result = kwargs["complete_streaming_response"]
+                        if langFuseLogger is None or (
+                            (
+                                self.langfuse_public_key is not None
+                                and self.langfuse_public_key
+                                != langFuseLogger.public_key
+                            )
+                            and (
+                                self.langfuse_public_key is not None
+                                and self.langfuse_public_key
+                                != langFuseLogger.public_key
+                            )
+                        ):
+                            langFuseLogger = LangFuseLogger(
+                                langfuse_public_key=self.langfuse_public_key,
+                                langfuse_secret=self.langfuse_secret,
+                            )
+                        langFuseLogger.log_event(
+                            kwargs=kwargs,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            user_id=kwargs.get("user", None),
+                            print_verbose=print_verbose,
+                        )
+                    if callback == "datadog":
+                        global dataDogLogger
+                        verbose_logger.debug("reaches datadog for success logging!")
+                        kwargs = {}
+                        for k, v in self.model_call_details.items():
+                            if (
+                                k != "original_response"
+                            ):  # copy.deepcopy raises errors as this could be a coroutine
+                                kwargs[k] = v
+                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+                        if self.stream:
+                            verbose_logger.debug(
+                                f"datadog: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
+                            )
+                            if complete_streaming_response is None:
+                                continue
+                            else:
+                                print_verbose("reaches datadog for streaming logging!")
+                                result = kwargs["complete_streaming_response"]
+                        dataDogLogger.log_event(
+                            kwargs=kwargs,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            user_id=kwargs.get("user", None),
+                            print_verbose=print_verbose,
+                        )
+                    if callback == "prometheus":
+                        verbose_logger.debug("reaches prometheus for success logging!")
+                        kwargs = {}
+                        for k, v in self.model_call_details.items():
+                            if (
+                                k != "original_response"
+                            ):  # copy.deepcopy raises errors as this could be a coroutine
+                                kwargs[k] = v
+                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+                        if self.stream:
+                            verbose_logger.debug(
+                                f"prometheus: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
+                            )
+                            if complete_streaming_response is None:
+                                continue
+                            else:
+                                print_verbose(
+                                    "reaches prometheus for streaming logging!"
+                                )
+                                result = kwargs["complete_streaming_response"]
+                        prometheusLogger.log_event(
+                            kwargs=kwargs,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            user_id=kwargs.get("user", None),
+                            print_verbose=print_verbose,
+                        )
+                    if callback == "generic":
+                        global genericAPILogger
+                        verbose_logger.debug("reaches langfuse for success logging!")
+                        kwargs = {}
+                        for k, v in self.model_call_details.items():
+                            if (
+                                k != "original_response"
+                            ):  # copy.deepcopy raises errors as this could be a coroutine
+                                kwargs[k] = v
+                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+                        if self.stream:
+                            verbose_logger.debug(
+                                f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
+                            )
+                            if complete_streaming_response is None:
+                                continue
+                            else:
+                                print_verbose("reaches langfuse for streaming logging!")
+                                result = kwargs["complete_streaming_response"]
+                        if genericAPILogger is None:
+                            genericAPILogger = GenericAPILogger()
+                        genericAPILogger.log_event(
+                            kwargs=kwargs,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            user_id=kwargs.get("user", None),
+                            print_verbose=print_verbose,
+                        )
+                    if callback == "clickhouse":
+                        global clickHouseLogger
+                        verbose_logger.debug("reaches clickhouse for success logging!")
+                        kwargs = {}
+                        for k, v in self.model_call_details.items():
+                            if (
+                                k != "original_response"
+                            ):  # copy.deepcopy raises errors as this could be a coroutine
+                                kwargs[k] = v
+                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+                        if self.stream:
+                            verbose_logger.debug(
+                                f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
+                            )
+                            if complete_streaming_response is None:
+                                continue
+                            else:
+                                print_verbose(
+                                    "reaches clickhouse for streaming logging!"
+                                )
+                                result = kwargs["complete_streaming_response"]
+                        if clickHouseLogger is None:
+                            clickHouseLogger = ClickhouseLogger()
+                        clickHouseLogger.log_event(
+                            kwargs=kwargs,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            user_id=kwargs.get("user", None),
+                            print_verbose=print_verbose,
+                        )
+                    if callback == "greenscale":
+                        kwargs = {}
+                        for k, v in self.model_call_details.items():
+                            if (
+                                k != "original_response"
+                            ):  # copy.deepcopy raises errors as this could be a coroutine
+                                kwargs[k] = v
+                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+                        if self.stream:
+                            verbose_logger.debug(
+                                f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
+                            )
+                            if complete_streaming_response is None:
+                                continue
+                            else:
+                                print_verbose(
+                                    "reaches greenscale for streaming logging!"
+                                )
+                                result = kwargs["complete_streaming_response"]
+
+                        greenscaleLogger.log_event(
+                            kwargs=kwargs,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            print_verbose=print_verbose,
+                        )
+                    if callback == "cache" and litellm.cache is not None:
+                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+                        print_verbose("success_callback: reaches cache for logging!")
+                        kwargs = self.model_call_details
+                        if self.stream:
+                            if "complete_streaming_response" not in kwargs:
+                                print_verbose(
+                                    f"success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
+                                )
+                                pass
+                            else:
+                                print_verbose(
+                                    "success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
+                                )
+                                result = kwargs["complete_streaming_response"]
+                                # only add to cache once we have a complete streaming response
+                                litellm.cache.add_cache(result, **kwargs)
+                    if callback == "athina":
+                        deep_copy = {}
+                        for k, v in self.model_call_details.items():
+                            deep_copy[k] = v
+                        athinaLogger.log_event(
+                            kwargs=deep_copy,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            print_verbose=print_verbose,
+                        )
+                    if callback == "traceloop":
+                        deep_copy = {}
+                        for k, v in self.model_call_details.items():
+                            if k != "original_response":
+                                deep_copy[k] = v
+                        traceloopLogger.log_event(
+                            kwargs=deep_copy,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            user_id=kwargs.get("user", None),
+                            print_verbose=print_verbose,
+                        )
+                    if callback == "s3":
+                        global s3Logger
+                        if s3Logger is None:
+                            s3Logger = S3Logger()
+                        if self.stream:
+                            if "complete_streaming_response" in self.model_call_details:
+                                print_verbose(
+                                    "S3Logger Logger: Got Stream Event - Completed Stream Response"
+                                )
+                                s3Logger.log_event(
+                                    kwargs=self.model_call_details,
+                                    response_obj=self.model_call_details[
+                                        "complete_streaming_response"
+                                    ],
+                                    start_time=start_time,
+                                    end_time=end_time,
+                                    print_verbose=print_verbose,
+                                )
+                            else:
+                                print_verbose(
+                                    "S3Logger Logger: Got Stream Event - No complete stream response as yet"
+                                )
+                        else:
+                            s3Logger.log_event(
+                                kwargs=self.model_call_details,
+                                response_obj=result,
+                                start_time=start_time,
+                                end_time=end_time,
+                                print_verbose=print_verbose,
+                            )
+                    if (
+                        callback == "openmeter"
+                        and self.model_call_details.get("litellm_params", {}).get(
+                            "acompletion", False
+                        )
+                        == False
+                        and self.model_call_details.get("litellm_params", {}).get(
+                            "aembedding", False
+                        )
+                        == False
+                        and self.model_call_details.get("litellm_params", {}).get(
+                            "aimage_generation", False
+                        )
+                        == False
+                        and self.model_call_details.get("litellm_params", {}).get(
+                            "atranscription", False
+                        )
+                        == False
+                    ):
+                        global openMeterLogger
+                        if openMeterLogger is None:
+                            print_verbose("Instantiates openmeter client")
+                            openMeterLogger = OpenMeterLogger()
+                        if self.stream and complete_streaming_response is None:
+                            openMeterLogger.log_stream_event(
+                                kwargs=self.model_call_details,
+                                response_obj=result,
+                                start_time=start_time,
+                                end_time=end_time,
+                            )
+                        else:
+                            if self.stream and complete_streaming_response:
+                                self.model_call_details["complete_response"] = (
+                                    self.model_call_details.get(
+                                        "complete_streaming_response", {}
+                                    )
+                                )
+                                result = self.model_call_details["complete_response"]
+                            openMeterLogger.log_success_event(
+                                kwargs=self.model_call_details,
+                                response_obj=result,
+                                start_time=start_time,
+                                end_time=end_time,
+                            )
+
+                    if (
+                        isinstance(callback, CustomLogger)
+                        and self.model_call_details.get("litellm_params", {}).get(
+                            "acompletion", False
+                        )
+                        == False
+                        and self.model_call_details.get("litellm_params", {}).get(
+                            "aembedding", False
+                        )
+                        == False
+                        and self.model_call_details.get("litellm_params", {}).get(
+                            "aimage_generation", False
+                        )
+                        == False
+                        and self.model_call_details.get("litellm_params", {}).get(
+                            "atranscription", False
+                        )
+                        == False
+                    ):  # custom logger class
+                        if self.stream and complete_streaming_response is None:
+                            callback.log_stream_event(
+                                kwargs=self.model_call_details,
+                                response_obj=result,
+                                start_time=start_time,
+                                end_time=end_time,
+                            )
+                        else:
+                            if self.stream and complete_streaming_response:
+                                self.model_call_details["complete_response"] = (
+                                    self.model_call_details.get(
+                                        "complete_streaming_response", {}
+                                    )
+                                )
+                                result = self.model_call_details["complete_response"]
+                            callback.log_success_event(
+                                kwargs=self.model_call_details,
+                                response_obj=result,
+                                start_time=start_time,
+                                end_time=end_time,
+                            )
+                    if (
+                        callable(callback) == True
+                        and self.model_call_details.get("litellm_params", {}).get(
+                            "acompletion", False
+                        )
+                        == False
+                        and self.model_call_details.get("litellm_params", {}).get(
+                            "aembedding", False
+                        )
+                        == False
+                        and self.model_call_details.get("litellm_params", {}).get(
+                            "aimage_generation", False
+                        )
+                        == False
+                        and self.model_call_details.get("litellm_params", {}).get(
+                            "atranscription", False
+                        )
+                        == False
+                    ):  # custom logger functions
+                        print_verbose(
+                            f"success callbacks: Running Custom Callback Function"
+                        )
+                        customLogger.log_event(
+                            kwargs=self.model_call_details,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            print_verbose=print_verbose,
+                            callback_func=callback,
+                        )
+
+                except Exception as e:
+                    print_verbose(
+                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging with integrations {traceback.format_exc()}"
+                    )
+                    print_verbose(
+                        f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
+                    )
+                    if capture_exception:  # log this error to sentry for debugging
+                        capture_exception(e)
+        except:
+            verbose_logger.error(
+                "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {}\n{}".format(
+                    str(e), traceback.format_exc()
+                ),
+            )
+
+    async def async_success_handler(
+        self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs
+    ):
+        """
+        Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions.
+        """
+        print_verbose("Logging Details LiteLLM-Async Success Call")
+        start_time, end_time, result = self._success_handler_helper_fn(
+            start_time=start_time, end_time=end_time, result=result, cache_hit=cache_hit
+        )
+        ## BUILD COMPLETE STREAMED RESPONSE
+        complete_streaming_response = None
+        if self.stream:
+            if result.choices[0].finish_reason is not None:  # if it's the last chunk
+                self.streaming_chunks.append(result)
+                # verbose_logger.debug(f"final set of received chunks: {self.streaming_chunks}")
+                try:
+                    complete_streaming_response = litellm.stream_chunk_builder(
+                        self.streaming_chunks,
+                        messages=self.model_call_details.get("messages", None),
+                        start_time=start_time,
+                        end_time=end_time,
+                    )
+                except Exception as e:
+                    print_verbose(
+                        "Error occurred building stream chunk in success logging: {}\n{}".format(
+                            str(e), traceback.format_exc()
+                        ),
+                        log_level="ERROR",
+                    )
+                    complete_streaming_response = None
+            else:
+                self.streaming_chunks.append(result)
+        if complete_streaming_response is not None:
+            print_verbose("Async success callbacks: Got a complete streaming response")
+            self.model_call_details["async_complete_streaming_response"] = (
+                complete_streaming_response
+            )
+            try:
+                if self.model_call_details.get("cache_hit", False) is True:
+                    self.model_call_details["response_cost"] = 0.0
+                else:
+                    # check if base_model set on azure
+                    base_model = _get_base_model_from_metadata(
+                        model_call_details=self.model_call_details
+                    )
+                    # base_model defaults to None if not set on model_info
+                    self.model_call_details["response_cost"] = litellm.completion_cost(
+                        completion_response=complete_streaming_response,
+                        model=base_model,
+                    )
+                verbose_logger.debug(
+                    f"Model={self.model}; cost={self.model_call_details['response_cost']}"
+                )
+            except litellm.NotFoundError as e:
+                verbose_logger.error(
+                    f"Model={self.model} not found in completion cost map. Setting 'response_cost' to None"
+                )
+                self.model_call_details["response_cost"] = None
+
+        if self.dynamic_async_success_callbacks is not None and isinstance(
+            self.dynamic_async_success_callbacks, list
+        ):
+            callbacks = self.dynamic_async_success_callbacks
+            ## keep the internal functions ##
+            for callback in litellm._async_success_callback:
+                callback_name = ""
+                if isinstance(callback, CustomLogger):
+                    callback_name = callback.__class__.__name__
+                if callable(callback):
+                    callback_name = callback.__name__
+                if "_PROXY_" in callback_name:
+                    callbacks.append(callback)
+        else:
+            callbacks = litellm._async_success_callback
+
+        result = redact_message_input_output_from_logging(
+            result=result, litellm_logging_obj=self
+        )
+
+        for callback in callbacks:
+            # check if callback can run for this request
+            litellm_params = self.model_call_details.get("litellm_params", {})
+            if litellm_params.get("no-log", False) == True:
+                # proxy cost tracking cal backs should run
+                if not (
+                    isinstance(callback, CustomLogger)
+                    and "_PROXY_" in callback.__class__.__name__
+                ):
+                    print_verbose("no-log request, skipping logging")
+                    continue
+            try:
+                if kwargs.get("no-log", False) == True:
+                    print_verbose("no-log request, skipping logging")
+                    continue
+                if callback == "cache" and litellm.cache is not None:
+                    # set_cache once complete streaming response is built
+                    print_verbose("async success_callback: reaches cache for logging!")
+                    kwargs = self.model_call_details
+                    if self.stream:
+                        if "async_complete_streaming_response" not in kwargs:
+                            print_verbose(
+                                f"async success_callback: reaches cache for logging, there is no async_complete_streaming_response. Kwargs={kwargs}\n\n"
+                            )
+                            pass
+                        else:
+                            print_verbose(
+                                "async success_callback: reaches cache for logging, there is a async_complete_streaming_response. Adding to cache"
+                            )
+                            result = kwargs["async_complete_streaming_response"]
+                            # only add to cache once we have a complete streaming response
+                            if litellm.cache is not None and not isinstance(
+                                litellm.cache.cache, S3Cache
+                            ):
+                                await litellm.cache.async_add_cache(result, **kwargs)
+                            else:
+                                litellm.cache.add_cache(result, **kwargs)
+                if callback == "openmeter":
+                    global openMeterLogger
+                    if self.stream == True:
+                        if (
+                            "async_complete_streaming_response"
+                            in self.model_call_details
+                        ):
+                            await openMeterLogger.async_log_success_event(
+                                kwargs=self.model_call_details,
+                                response_obj=self.model_call_details[
+                                    "async_complete_streaming_response"
+                                ],
+                                start_time=start_time,
+                                end_time=end_time,
+                            )
+                        else:
+                            await openMeterLogger.async_log_stream_event(  # [TODO]: move this to being an async log stream event function
+                                kwargs=self.model_call_details,
+                                response_obj=result,
+                                start_time=start_time,
+                                end_time=end_time,
+                            )
+                    else:
+                        await openMeterLogger.async_log_success_event(
+                            kwargs=self.model_call_details,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                        )
+                if isinstance(callback, CustomLogger):  # custom logger class
+                    if self.stream == True:
+                        if (
+                            "async_complete_streaming_response"
+                            in self.model_call_details
+                        ):
+                            await callback.async_log_success_event(
+                                kwargs=self.model_call_details,
+                                response_obj=self.model_call_details[
+                                    "async_complete_streaming_response"
+                                ],
+                                start_time=start_time,
+                                end_time=end_time,
+                            )
+                        else:
+                            await callback.async_log_stream_event(  # [TODO]: move this to being an async log stream event function
+                                kwargs=self.model_call_details,
+                                response_obj=result,
+                                start_time=start_time,
+                                end_time=end_time,
+                            )
+                    else:
+                        await callback.async_log_success_event(
+                            kwargs=self.model_call_details,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                        )
+                if callable(callback):  # custom logger functions
+                    if self.stream:
+                        if (
+                            "async_complete_streaming_response"
+                            in self.model_call_details
+                        ):
+                            await customLogger.async_log_event(
+                                kwargs=self.model_call_details,
+                                response_obj=self.model_call_details[
+                                    "async_complete_streaming_response"
+                                ],
+                                start_time=start_time,
+                                end_time=end_time,
+                                print_verbose=print_verbose,
+                                callback_func=callback,
+                            )
+                    else:
+                        await customLogger.async_log_event(
+                            kwargs=self.model_call_details,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            print_verbose=print_verbose,
+                            callback_func=callback,
+                        )
+                if callback == "dynamodb":
+                    global dynamoLogger
+                    if dynamoLogger is None:
+                        dynamoLogger = DyanmoDBLogger()
+                    if self.stream:
+                        if (
+                            "async_complete_streaming_response"
+                            in self.model_call_details
+                        ):
+                            print_verbose(
+                                "DynamoDB Logger: Got Stream Event - Completed Stream Response"
+                            )
+                            await dynamoLogger._async_log_event(
+                                kwargs=self.model_call_details,
+                                response_obj=self.model_call_details[
+                                    "async_complete_streaming_response"
+                                ],
+                                start_time=start_time,
+                                end_time=end_time,
+                                print_verbose=print_verbose,
+                            )
+                        else:
+                            print_verbose(
+                                "DynamoDB Logger: Got Stream Event - No complete stream response as yet"
+                            )
+                    else:
+                        await dynamoLogger._async_log_event(
+                            kwargs=self.model_call_details,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            print_verbose=print_verbose,
+                        )
+            except Exception as e:
+                verbose_logger.error(
+                    f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
+                )
+                pass
+
+    def _failure_handler_helper_fn(
+        self, exception, traceback_exception, start_time=None, end_time=None
+    ):
+        if start_time is None:
+            start_time = self.start_time
+        if end_time is None:
+            end_time = datetime.datetime.now()
+
+        # on some exceptions, model_call_details is not always initialized, this ensures that we still log those exceptions
+        if not hasattr(self, "model_call_details"):
+            self.model_call_details = {}
+
+        self.model_call_details["log_event_type"] = "failed_api_call"
+        self.model_call_details["exception"] = exception
+        self.model_call_details["traceback_exception"] = traceback_exception
+        self.model_call_details["end_time"] = end_time
+        self.model_call_details.setdefault("original_response", None)
+        return start_time, end_time
+
+    def failure_handler(
+        self, exception, traceback_exception, start_time=None, end_time=None
+    ):
+        verbose_logger.debug(
+            f"Logging Details LiteLLM-Failure Call: {litellm.failure_callback}"
+        )
+        try:
+            start_time, end_time = self._failure_handler_helper_fn(
+                exception=exception,
+                traceback_exception=traceback_exception,
+                start_time=start_time,
+                end_time=end_time,
+            )
+            callbacks = []  # init this to empty incase it's not created
+
+            if self.dynamic_failure_callbacks is not None and isinstance(
+                self.dynamic_failure_callbacks, list
+            ):
+                callbacks = self.dynamic_failure_callbacks
+                ## keep the internal functions ##
+                for callback in litellm.failure_callback:
+                    if (
+                        isinstance(callback, CustomLogger)
+                        and "_PROXY_" in callback.__class__.__name__
+                    ):
+                        callbacks.append(callback)
+            else:
+                callbacks = litellm.failure_callback
+
+            result = None  # result sent to all loggers, init this to None incase it's not created
+
+            result = redact_message_input_output_from_logging(
+                result=result, litellm_logging_obj=self
+            )
+            for callback in callbacks:
+                try:
+                    if callback == "lite_debugger":
+                        print_verbose("reaches lite_debugger for logging!")
+                        print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
+                        result = {
+                            "model": self.model,
+                            "created": time.time(),
+                            "error": traceback_exception,
+                            "usage": {
+                                "prompt_tokens": prompt_token_calculator(
+                                    self.model, messages=self.messages
+                                ),
+                                "completion_tokens": 0,
+                            },
+                        }
+                        liteDebuggerClient.log_event(
+                            model=self.model,
+                            messages=self.messages,
+                            end_user=self.model_call_details.get("user", "default"),
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            litellm_call_id=self.litellm_call_id,
+                            print_verbose=print_verbose,
+                            call_type=self.call_type,
+                            stream=self.stream,
+                        )
+                    if callback == "lunary":
+                        print_verbose("reaches lunary for logging error!")
+
+                        model = self.model
+
+                        input = self.model_call_details["input"]
+
+                        _type = (
+                            "embed"
+                            if self.call_type == CallTypes.embedding.value
+                            else "llm"
+                        )
+
+                        lunaryLogger.log_event(
+                            type=_type,
+                            event="error",
+                            user_id=self.model_call_details.get("user", "default"),
+                            model=model,
+                            input=input,
+                            error=traceback_exception,
+                            run_id=self.litellm_call_id,
+                            start_time=start_time,
+                            end_time=end_time,
+                            print_verbose=print_verbose,
+                        )
+                    if callback == "sentry":
+                        print_verbose("sending exception to sentry")
+                        if capture_exception:
+                            capture_exception(exception)
+                        else:
+                            print_verbose(
+                                f"capture exception not initialized: {capture_exception}"
+                            )
+                    elif callback == "supabase":
+                        print_verbose("reaches supabase for logging!")
+                        print_verbose(f"supabaseClient: {supabaseClient}")
+                        result = {
+                            "model": model,
+                            "created": time.time(),
+                            "error": traceback_exception,
+                            "usage": {
+                                "prompt_tokens": prompt_token_calculator(
+                                    model, messages=self.messages
+                                ),
+                                "completion_tokens": 0,
+                            },
+                        }
+                        supabaseClient.log_event(
+                            model=self.model,
+                            messages=self.messages,
+                            end_user=self.model_call_details.get("user", "default"),
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            litellm_call_id=self.model_call_details["litellm_call_id"],
+                            print_verbose=print_verbose,
+                        )
+                    if callable(callback):  # custom logger functions
+                        customLogger.log_event(
+                            kwargs=self.model_call_details,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            print_verbose=print_verbose,
+                            callback_func=callback,
+                        )
+                    if (
+                        isinstance(callback, CustomLogger)
+                        and self.model_call_details.get("litellm_params", {}).get(
+                            "acompletion", False
+                        )
+                        == False
+                        and self.model_call_details.get("litellm_params", {}).get(
+                            "aembedding", False
+                        )
+                        == False
+                    ):  # custom logger class
+                        callback.log_failure_event(
+                            start_time=start_time,
+                            end_time=end_time,
+                            response_obj=result,
+                            kwargs=self.model_call_details,
+                        )
+                    if callback == "langfuse":
+                        global langFuseLogger
+                        verbose_logger.debug("reaches langfuse for logging failure")
+                        kwargs = {}
+                        for k, v in self.model_call_details.items():
+                            if (
+                                k != "original_response"
+                            ):  # copy.deepcopy raises errors as this could be a coroutine
+                                kwargs[k] = v
+                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+                        if langFuseLogger is None or (
+                            (
+                                self.langfuse_public_key is not None
+                                and self.langfuse_public_key
+                                != langFuseLogger.public_key
+                            )
+                            and (
+                                self.langfuse_public_key is not None
+                                and self.langfuse_public_key
+                                != langFuseLogger.public_key
+                            )
+                        ):
+                            langFuseLogger = LangFuseLogger(
+                                langfuse_public_key=self.langfuse_public_key,
+                                langfuse_secret=self.langfuse_secret,
+                            )
+                        langFuseLogger.log_event(
+                            start_time=start_time,
+                            end_time=end_time,
+                            response_obj=None,
+                            user_id=kwargs.get("user", None),
+                            print_verbose=print_verbose,
+                            status_message=str(exception),
+                            level="ERROR",
+                            kwargs=self.model_call_details,
+                        )
+                    if callback == "traceloop":
+                        traceloopLogger.log_event(
+                            start_time=start_time,
+                            end_time=end_time,
+                            response_obj=None,
+                            user_id=kwargs.get("user", None),
+                            print_verbose=print_verbose,
+                            status_message=str(exception),
+                            level="ERROR",
+                            kwargs=self.model_call_details,
+                        )
+                    if callback == "prometheus":
+                        global prometheusLogger
+                        verbose_logger.debug("reaches prometheus for success logging!")
+                        kwargs = {}
+                        for k, v in self.model_call_details.items():
+                            if (
+                                k != "original_response"
+                            ):  # copy.deepcopy raises errors as this could be a coroutine
+                                kwargs[k] = v
+                        kwargs["exception"] = str(exception)
+                        prometheusLogger.log_event(
+                            kwargs=kwargs,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            user_id=kwargs.get("user", None),
+                            print_verbose=print_verbose,
+                        )
+
+                    if callback == "logfire":
+                        verbose_logger.debug("reaches logfire for failure logging!")
+                        kwargs = {}
+                        for k, v in self.model_call_details.items():
+                            if (
+                                k != "original_response"
+                            ):  # copy.deepcopy raises errors as this could be a coroutine
+                                kwargs[k] = v
+                        kwargs["exception"] = exception
+
+                        logfireLogger.log_event(
+                            kwargs=kwargs,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            level=LogfireLevel.ERROR.value,
+                            print_verbose=print_verbose,
+                        )
+                except Exception as e:
+                    print_verbose(
+                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {str(e)}"
+                    )
+                    print_verbose(
+                        f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
+                    )
+                    if capture_exception:  # log this error to sentry for debugging
+                        capture_exception(e)
+        except Exception as e:
+            verbose_logger.error(
+                "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging {}\n{}".format(
+                    str(e), traceback.format_exc()
+                )
+            )
+
+    async def async_failure_handler(
+        self, exception, traceback_exception, start_time=None, end_time=None
+    ):
+        """
+        Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions.
+        """
+        start_time, end_time = self._failure_handler_helper_fn(
+            exception=exception,
+            traceback_exception=traceback_exception,
+            start_time=start_time,
+            end_time=end_time,
+        )
+        result = None  # result sent to all loggers, init this to None incase it's not created
+        for callback in litellm._async_failure_callback:
+            try:
+                if isinstance(callback, CustomLogger):  # custom logger class
+                    await callback.async_log_failure_event(
+                        kwargs=self.model_call_details,
+                        response_obj=result,
+                        start_time=start_time,
+                        end_time=end_time,
+                    )  # type: ignore
+                if callable(callback):  # custom logger functions
+                    await customLogger.async_log_event(
+                        kwargs=self.model_call_details,
+                        response_obj=result,
+                        start_time=start_time,
+                        end_time=end_time,
+                        print_verbose=print_verbose,
+                        callback_func=callback,
+                    )
+            except Exception as e:
+                verbose_logger.error(
+                    "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success \
+                        logging {}\n{}\nCallback={}".format(
+                        str(e), traceback.format_exc(), callback
+                    )
+                )
+
+
+# #  class Logging:
+#     global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger
+
+#     custom_pricing: bool = False
+#     stream_options = None
+
+#     def __init__(
+#         self,
+#         model,
+#         messages,
+#         stream,
+#         call_type,
+#         start_time,
+#         litellm_call_id,
+#         function_id,
+#         dynamic_success_callbacks=None,
+#         dynamic_failure_callbacks=None,
+#         dynamic_async_success_callbacks=None,
+#         langfuse_public_key=None,
+#         langfuse_secret=None,
+#     ):
+#         if call_type not in [item.value for item in CallTypes]:
+#             allowed_values = ", ".join([item.value for item in CallTypes])
+#             raise ValueError(
+#                 f"Invalid call_type {call_type}. Allowed values: {allowed_values}"
+#             )
+#         if messages is not None:
+#             if isinstance(messages, str):
+#                 messages = [
+#                     {"role": "user", "content": messages}
+#                 ]  # convert text completion input to the chat completion format
+#             elif (
+#                 isinstance(messages, list)
+#                 and len(messages) > 0
+#                 and isinstance(messages[0], str)
+#             ):
+#                 new_messages = []
+#                 for m in messages:
+#                     new_messages.append({"role": "user", "content": m})
+#                 messages = new_messages
+#         self.model = model
+#         self.messages = messages
+#         self.stream = stream
+#         self.start_time = start_time  # log the call start time
+#         self.call_type = call_type
+#         self.litellm_call_id = litellm_call_id
+#         self.function_id = function_id
+#         self.streaming_chunks = []  # for generating complete stream response
+#         self.sync_streaming_chunks = []  # for generating complete stream response
+#         self.model_call_details = {}
+#         self.dynamic_input_callbacks = []  # [TODO] callbacks set for just that call
+#         self.dynamic_failure_callbacks = dynamic_failure_callbacks
+#         self.dynamic_success_callbacks = (
+#             dynamic_success_callbacks  # callbacks set for just that call
+#         )
+#         self.dynamic_async_success_callbacks = (
+#             dynamic_async_success_callbacks  # callbacks set for just that call
+#         )
+#         ## DYNAMIC LANGFUSE KEYS ##
+#         self.langfuse_public_key = langfuse_public_key
+#         self.langfuse_secret = langfuse_secret
+#         ## TIME TO FIRST TOKEN LOGGING ##
+#         self.completion_start_time: Optional[datetime.datetime] = None
+
+#     def update_environment_variables(
+#         self, model, user, optional_params, litellm_params, **additional_params
+#     ):
+#         self.optional_params = optional_params
+#         self.model = model
+#         self.user = user
+#         self.litellm_params = litellm_params
+#         self.logger_fn = litellm_params.get("logger_fn", None)
+#         print_verbose(f"self.optional_params: {self.optional_params}")
+
+#         self.model_call_details = {
+#             "model": self.model,
+#             "messages": self.messages,
+#             "optional_params": self.optional_params,
+#             "litellm_params": self.litellm_params,
+#             "start_time": self.start_time,
+#             "stream": self.stream,
+#             "user": user,
+#             "call_type": str(self.call_type),
+#             "litellm_call_id": self.litellm_call_id,
+#             "completion_start_time": self.completion_start_time,
+#             **self.optional_params,
+#             **additional_params,
+#         }
+
+#         ## check if stream options is set ##  - used by CustomStreamWrapper for easy instrumentation
+#         if "stream_options" in additional_params:
+#             self.stream_options = additional_params["stream_options"]
+#         ## check if custom pricing set ##
+#         if (
+#             litellm_params.get("input_cost_per_token") is not None
+#             or litellm_params.get("input_cost_per_second") is not None
+#             or litellm_params.get("output_cost_per_token") is not None
+#             or litellm_params.get("output_cost_per_second") is not None
+#         ):
+#             self.custom_pricing = True
+
+#     def _pre_call(self, input, api_key, model=None, additional_args={}):
+#         """
+#         Common helper function across the sync + async pre-call function
+#         """
+#         # print_verbose(f"logging pre call for model: {self.model} with call type: {self.call_type}")
+#         self.model_call_details["input"] = input
+#         self.model_call_details["api_key"] = api_key
+#         self.model_call_details["additional_args"] = additional_args
+#         self.model_call_details["log_event_type"] = "pre_api_call"
+#         if (
+#             model
+#         ):  # if model name was changes pre-call, overwrite the initial model call name with the new one
+#             self.model_call_details["model"] = model
+
+#     def pre_call(self, input, api_key, model=None, additional_args={}):
+#         # Log the exact input to the LLM API
+#         litellm.error_logs["PRE_CALL"] = locals()
+#         try:
+#             self._pre_call(
+#                 input=input,
+#                 api_key=api_key,
+#                 model=model,
+#                 additional_args=additional_args,
+#             )
+
+#             # User Logging -> if you pass in a custom logging function
+#             headers = additional_args.get("headers", {})
+#             if headers is None:
+#                 headers = {}
+#             data = additional_args.get("complete_input_dict", {})
+#             api_base = additional_args.get("api_base", "")
+#             self.model_call_details["litellm_params"]["api_base"] = str(
+#                 api_base
+#             )  # used for alerting
+#             masked_headers = {
+#                 k: (
+#                     (v[:-44] + "*" * 44)
+#                     if (isinstance(v, str) and len(v) > 44)
+#                     else "*****"
+#                 )
+#                 for k, v in headers.items()
+#             }
+#             formatted_headers = " ".join(
+#                 [f"-H '{k}: {v}'" for k, v in masked_headers.items()]
+#             )
+
+#             verbose_logger.debug(f"PRE-API-CALL ADDITIONAL ARGS: {additional_args}")
+
+#             curl_command = "\n\nPOST Request Sent from LiteLLM:\n"
+#             curl_command += "curl -X POST \\\n"
+#             curl_command += f"{api_base} \\\n"
+#             curl_command += (
+#                 f"{formatted_headers} \\\n" if formatted_headers.strip() != "" else ""
+#             )
+#             curl_command += f"-d '{str(data)}'\n"
+#             if additional_args.get("request_str", None) is not None:
+#                 # print the sagemaker / bedrock client request
+#                 curl_command = "\nRequest Sent from LiteLLM:\n"
+#                 curl_command += additional_args.get("request_str", None)
+#             elif api_base == "":
+#                 curl_command = self.model_call_details
+
+#             # only print verbose if verbose logger is not set
+#             if verbose_logger.level == 0:
+#                 # this means verbose logger was not switched on - user is in litellm.set_verbose=True
+#                 print_verbose(f"\033[92m{curl_command}\033[0m\n")
+
+#             if litellm.json_logs:
+#                 verbose_logger.debug(
+#                     "POST Request Sent from LiteLLM",
+#                     extra={"api_base": {api_base}, **masked_headers},
+#                 )
+#             else:
+#                 verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n")
+#             # log raw request to provider (like LangFuse) -- if opted in.
+#             if litellm.log_raw_request_response is True:
+#                 try:
+#                     # [Non-blocking Extra Debug Information in metadata]
+#                     _litellm_params = self.model_call_details.get("litellm_params", {})
+#                     _metadata = _litellm_params.get("metadata", {}) or {}
+#                     if (
+#                         litellm.turn_off_message_logging is not None
+#                         and litellm.turn_off_message_logging is True
+#                     ):
+#                         _metadata["raw_request"] = (
+#                             "redacted by litellm. \
+#                             'litellm.turn_off_message_logging=True'"
+#                         )
+#                     else:
+#                         _metadata["raw_request"] = str(curl_command)
+#                 except Exception as e:
+#                     _metadata["raw_request"] = (
+#                         "Unable to Log \
+#                         raw request: {}".format(
+#                             str(e)
+#                         )
+#                     )
+#             if self.logger_fn and callable(self.logger_fn):
+#                 try:
+#                     self.logger_fn(
+#                         self.model_call_details
+#                     )  # Expectation: any logger function passed in by the user should accept a dict object
+#                 except Exception as e:
+#                     print_verbose(
+#                         f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
+#                     )
+#             # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
+#             callbacks = litellm.input_callback + self.dynamic_input_callbacks
+#             for callback in callbacks:
+#                 try:
+#                     if callback == "supabase":
+#                         print_verbose("reaches supabase for logging!")
+#                         model = self.model_call_details["model"]
+#                         messages = self.model_call_details["input"]
+#                         print_verbose(f"supabaseClient: {supabaseClient}")
+#                         supabaseClient.input_log_event(
+#                             model=model,
+#                             messages=messages,
+#                             end_user=self.model_call_details.get("user", "default"),
+#                             litellm_call_id=self.litellm_params["litellm_call_id"],
+#                             print_verbose=print_verbose,
+#                         )
+#                     elif callback == "sentry" and add_breadcrumb:
+#                         try:
+#                             details_to_log = copy.deepcopy(self.model_call_details)
+#                         except:
+#                             details_to_log = self.model_call_details
+#                         if litellm.turn_off_message_logging:
+#                             # make a copy of the _model_Call_details and log it
+#                             details_to_log.pop("messages", None)
+#                             details_to_log.pop("input", None)
+#                             details_to_log.pop("prompt", None)
+
+#                         add_breadcrumb(
+#                             category="litellm.llm_call",
+#                             message=f"Model Call Details pre-call: {details_to_log}",
+#                             level="info",
+#                         )
+#                     elif isinstance(callback, CustomLogger):  # custom logger class
+#                         callback.log_pre_api_call(
+#                             model=self.model,
+#                             messages=self.messages,
+#                             kwargs=self.model_call_details,
+#                         )
+#                     elif callable(callback):  # custom logger functions
+#                         customLogger.log_input_event(
+#                             model=self.model,
+#                             messages=self.messages,
+#                             kwargs=self.model_call_details,
+#                             print_verbose=print_verbose,
+#                             callback_func=callback,
+#                         )
+#                 except Exception as e:
+#                     verbose_logger.error(
+#                         "litellm.Logging.pre_call(): Exception occured - {}".format(
+#                             str(e)
+#                         )
+#                     )
+#                     verbose_logger.debug(
+#                         f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while input logging with integrations {traceback.format_exc()}"
+#                     )
+#                     print_verbose(
+#                         f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
+#                     )
+#                     if capture_exception:  # log this error to sentry for debugging
+#                         capture_exception(e)
+#         except:
+#             print_verbose(
+#                 f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
+#             )
+#             print_verbose(
+#                 f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
+#             )
+#             if capture_exception:  # log this error to sentry for debugging
+#                 capture_exception(e)
+
+#     def post_call(
+#         self, original_response, input=None, api_key=None, additional_args={}
+#     ):
+#         # Log the exact result from the LLM API, for streaming - log the type of response received
+#         litellm.error_logs["POST_CALL"] = locals()
+#         if isinstance(original_response, dict):
+#             original_response = json.dumps(original_response)
+#         try:
+#             self.model_call_details["input"] = input
+#             self.model_call_details["api_key"] = api_key
+#             self.model_call_details["original_response"] = original_response
+#             self.model_call_details["additional_args"] = additional_args
+#             self.model_call_details["log_event_type"] = "post_api_call"
+#             # User Logging -> if you pass in a custom logging function
+#             print_verbose(
+#                 f"RAW RESPONSE:\n{self.model_call_details.get('original_response', self.model_call_details)}\n\n",
+#                 log_level="DEBUG",
+#             )
+#             if self.logger_fn and callable(self.logger_fn):
+#                 try:
+#                     self.logger_fn(
+#                         self.model_call_details
+#                     )  # Expectation: any logger function passed in by the user should accept a dict object
+#                 except Exception as e:
+#                     print_verbose(
+#                         f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
+#                     )
+#             original_response = redact_message_input_output_from_logging(
+#                 litellm_logging_obj=self, result=original_response
+#             )
+#             # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
+
+#             callbacks = litellm.input_callback + self.dynamic_input_callbacks
+#             for callback in callbacks:
+#                 try:
+#                     if callback == "lite_debugger":
+#                         print_verbose("reaches litedebugger for post-call logging!")
+#                         print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
+#                         liteDebuggerClient.post_call_log_event(
+#                             original_response=original_response,
+#                             litellm_call_id=self.litellm_params["litellm_call_id"],
+#                             print_verbose=print_verbose,
+#                             call_type=self.call_type,
+#                             stream=self.stream,
+#                         )
+#                     elif callback == "sentry" and add_breadcrumb:
+#                         print_verbose("reaches sentry breadcrumbing")
+#                         try:
+#                             details_to_log = copy.deepcopy(self.model_call_details)
+#                         except:
+#                             details_to_log = self.model_call_details
+#                         if litellm.turn_off_message_logging:
+#                             # make a copy of the _model_Call_details and log it
+#                             details_to_log.pop("messages", None)
+#                             details_to_log.pop("input", None)
+#                             details_to_log.pop("prompt", None)
+
+#                         add_breadcrumb(
+#                             category="litellm.llm_call",
+#                             message=f"Model Call Details post-call: {details_to_log}",
+#                             level="info",
+#                         )
+#                     elif isinstance(callback, CustomLogger):  # custom logger class
+#                         callback.log_post_api_call(
+#                             kwargs=self.model_call_details,
+#                             response_obj=None,
+#                             start_time=self.start_time,
+#                             end_time=None,
+#                         )
+#                 except Exception as e:
+#                     print_verbose(
+#                         f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while post-call logging with integrations {traceback.format_exc()}"
+#                     )
+#                     print_verbose(
+#                         f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
+#                     )
+#                     if capture_exception:  # log this error to sentry for debugging
+#                         capture_exception(e)
+#         except:
+#             print_verbose(
+#                 f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
+#             )
+#             pass
+
+#     def _success_handler_helper_fn(
+#         self, result=None, start_time=None, end_time=None, cache_hit=None
+#     ):
+#         try:
+#             if start_time is None:
+#                 start_time = self.start_time
+#             if end_time is None:
+#                 end_time = datetime.datetime.now()
+#             if self.completion_start_time is None:
+#                 self.completion_start_time = end_time
+#                 self.model_call_details["completion_start_time"] = (
+#                     self.completion_start_time
+#                 )
+#             self.model_call_details["log_event_type"] = "successful_api_call"
+#             self.model_call_details["end_time"] = end_time
+#             self.model_call_details["cache_hit"] = cache_hit
+#             ## if model in model cost map - log the response cost
+#             ## else set cost to None
+#             verbose_logger.debug(f"Model={self.model};")
+#             if (
+#                 result is not None
+#                 and (
+#                     isinstance(result, ModelResponse)
+#                     or isinstance(result, EmbeddingResponse)
+#                     or isinstance(result, ImageResponse)
+#                     or isinstance(result, TranscriptionResponse)
+#                     or isinstance(result, TextCompletionResponse)
+#                 )
+#                 and self.stream != True
+#             ):  # handle streaming separately
+#                 self.model_call_details["response_cost"] = (
+#                     litellm.response_cost_calculator(
+#                         response_object=result,
+#                         model=self.model,
+#                         cache_hit=self.model_call_details.get("cache_hit", False),
+#                         custom_llm_provider=self.model_call_details.get(
+#                             "custom_llm_provider", None
+#                         ),
+#                         base_model=_get_base_model_from_metadata(
+#                             model_call_details=self.model_call_details
+#                         ),
+#                         call_type=self.call_type,
+#                         optional_params=self.optional_params,
+#                     )
+#                 )
+#             else:  # streaming chunks + image gen.
+#                 self.model_call_details["response_cost"] = None
+
+#             if (
+#                 litellm.max_budget
+#                 and self.stream == False
+#                 and result is not None
+#                 and "content" in result
+#             ):
+#                 time_diff = (end_time - start_time).total_seconds()
+#                 float_diff = float(time_diff)
+#                 litellm._current_cost += litellm.completion_cost(
+#                     model=self.model,
+#                     prompt="",
+#                     completion=result["content"],
+#                     total_time=float_diff,
+#                 )
+
+#             return start_time, end_time, result
+#         except Exception as e:
+#             raise Exception(f"[Non-Blocking] LiteLLM.Success_Call Error: {str(e)}")
+
+#     def success_handler(
+#         self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs
+#     ):
+#         print_verbose(f"Logging Details LiteLLM-Success Call: {cache_hit}")
+#         start_time, end_time, result = self._success_handler_helper_fn(
+#             start_time=start_time,
+#             end_time=end_time,
+#             result=result,
+#             cache_hit=cache_hit,
+#         )
+#         # print(f"original response in success handler: {self.model_call_details['original_response']}")
+#         try:
+#             print_verbose(f"success callbacks: {litellm.success_callback}")
+#             ## BUILD COMPLETE STREAMED RESPONSE
+#             complete_streaming_response = None
+#             if self.stream and isinstance(result, ModelResponse):
+#                 if (
+#                     result.choices[0].finish_reason is not None
+#                 ):  # if it's the last chunk
+#                     self.sync_streaming_chunks.append(result)
+#                     # print_verbose(f"final set of received chunks: {self.sync_streaming_chunks}")
+#                     try:
+#                         complete_streaming_response = litellm.stream_chunk_builder(
+#                             self.sync_streaming_chunks,
+#                             messages=self.model_call_details.get("messages", None),
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                         )
+#                     except Exception as e:
+#                         print_verbose(
+#                             "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while building complete streaming response in success logging {}\n{}".format(
+#                                 str(e), traceback.format_exc()
+#                             ),
+#                             log_level="ERROR",
+#                         )
+#                         complete_streaming_response = None
+#                 else:
+#                     self.sync_streaming_chunks.append(result)
+
+#             if complete_streaming_response is not None:
+#                 print_verbose(
+#                     f"Logging Details LiteLLM-Success Call streaming complete"
+#                 )
+#                 self.model_call_details["complete_streaming_response"] = (
+#                     complete_streaming_response
+#                 )
+#                 self.model_call_details["response_cost"] = (
+#                     litellm.response_cost_calculator(
+#                         response_object=complete_streaming_response,
+#                         model=self.model,
+#                         cache_hit=self.model_call_details.get("cache_hit", False),
+#                         custom_llm_provider=self.model_call_details.get(
+#                             "custom_llm_provider", None
+#                         ),
+#                         base_model=_get_base_model_from_metadata(
+#                             model_call_details=self.model_call_details
+#                         ),
+#                         call_type=self.call_type,
+#                         optional_params=self.optional_params,
+#                     )
+#                 )
+#             if self.dynamic_success_callbacks is not None and isinstance(
+#                 self.dynamic_success_callbacks, list
+#             ):
+#                 callbacks = self.dynamic_success_callbacks
+#                 ## keep the internal functions ##
+#                 for callback in litellm.success_callback:
+#                     if (
+#                         isinstance(callback, CustomLogger)
+#                         and "_PROXY_" in callback.__class__.__name__
+#                     ):
+#                         callbacks.append(callback)
+#             else:
+#                 callbacks = litellm.success_callback
+
+#             result = redact_message_input_output_from_logging(
+#                 result=result, litellm_logging_obj=self
+#             )
+
+#             for callback in callbacks:
+#                 try:
+#                     litellm_params = self.model_call_details.get("litellm_params", {})
+#                     if litellm_params.get("no-log", False) == True:
+#                         # proxy cost tracking cal backs should run
+#                         if not (
+#                             isinstance(callback, CustomLogger)
+#                             and "_PROXY_" in callback.__class__.__name__
+#                         ):
+#                             print_verbose("no-log request, skipping logging")
+#                             continue
+#                     if callback == "lite_debugger":
+#                         print_verbose("reaches lite_debugger for logging!")
+#                         print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
+#                         print_verbose(
+#                             f"liteDebuggerClient details function {self.call_type} and stream set to {self.stream}"
+#                         )
+#                         liteDebuggerClient.log_event(
+#                             end_user=kwargs.get("user", "default"),
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             litellm_call_id=self.litellm_call_id,
+#                             print_verbose=print_verbose,
+#                             call_type=self.call_type,
+#                             stream=self.stream,
+#                         )
+#                     if callback == "promptlayer":
+#                         print_verbose("reaches promptlayer for logging!")
+#                         promptLayerLogger.log_event(
+#                             kwargs=self.model_call_details,
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             print_verbose=print_verbose,
+#                         )
+#                     if callback == "supabase":
+#                         print_verbose("reaches supabase for logging!")
+#                         kwargs = self.model_call_details
+
+#                         # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+#                         if self.stream:
+#                             if "complete_streaming_response" not in kwargs:
+#                                 continue
+#                             else:
+#                                 print_verbose("reaches supabase for streaming logging!")
+#                                 result = kwargs["complete_streaming_response"]
+
+#                         model = kwargs["model"]
+#                         messages = kwargs["messages"]
+#                         optional_params = kwargs.get("optional_params", {})
+#                         litellm_params = kwargs.get("litellm_params", {})
+#                         supabaseClient.log_event(
+#                             model=model,
+#                             messages=messages,
+#                             end_user=optional_params.get("user", "default"),
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             litellm_call_id=litellm_params.get(
+#                                 "litellm_call_id", str(uuid.uuid4())
+#                             ),
+#                             print_verbose=print_verbose,
+#                         )
+#                     if callback == "wandb":
+#                         print_verbose("reaches wandb for logging!")
+#                         weightsBiasesLogger.log_event(
+#                             kwargs=self.model_call_details,
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             print_verbose=print_verbose,
+#                         )
+#                     if callback == "langsmith":
+#                         print_verbose("reaches langsmith for logging!")
+#                         if self.stream:
+#                             if "complete_streaming_response" not in kwargs:
+#                                 continue
+#                             else:
+#                                 print_verbose(
+#                                     "reaches langsmith for streaming logging!"
+#                                 )
+#                                 result = kwargs["complete_streaming_response"]
+#                         langsmithLogger.log_event(
+#                             kwargs=self.model_call_details,
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             print_verbose=print_verbose,
+#                         )
+#                     if callback == "logfire":
+#                         global logfireLogger
+#                         verbose_logger.debug("reaches logfire for success logging!")
+#                         kwargs = {}
+#                         for k, v in self.model_call_details.items():
+#                             if (
+#                                 k != "original_response"
+#                             ):  # copy.deepcopy raises errors as this could be a coroutine
+#                                 kwargs[k] = v
+
+#                         # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+#                         if self.stream:
+#                             if "complete_streaming_response" not in kwargs:
+#                                 continue
+#                             else:
+#                                 print_verbose("reaches logfire for streaming logging!")
+#                                 result = kwargs["complete_streaming_response"]
+
+#                         logfireLogger.log_event(
+#                             kwargs=self.model_call_details,
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             print_verbose=print_verbose,
+#                             level=LogfireLevel.INFO.value,
+#                         )
+
+#                     if callback == "lunary":
+#                         print_verbose("reaches lunary for logging!")
+#                         model = self.model
+#                         kwargs = self.model_call_details
+
+#                         input = kwargs.get("messages", kwargs.get("input", None))
+
+#                         type = (
+#                             "embed"
+#                             if self.call_type == CallTypes.embedding.value
+#                             else "llm"
+#                         )
+
+#                         # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+#                         if self.stream:
+#                             if "complete_streaming_response" not in kwargs:
+#                                 continue
+#                             else:
+#                                 result = kwargs["complete_streaming_response"]
+
+#                         lunaryLogger.log_event(
+#                             type=type,
+#                             kwargs=kwargs,
+#                             event="end",
+#                             model=model,
+#                             input=input,
+#                             user_id=kwargs.get("user", None),
+#                             # user_props=self.model_call_details.get("user_props", None),
+#                             extra=kwargs.get("optional_params", {}),
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             run_id=self.litellm_call_id,
+#                             print_verbose=print_verbose,
+#                         )
+#                     if callback == "helicone":
+#                         print_verbose("reaches helicone for logging!")
+#                         model = self.model
+#                         messages = self.model_call_details["input"]
+#                         heliconeLogger.log_success(
+#                             model=model,
+#                             messages=messages,
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             print_verbose=print_verbose,
+#                         )
+#                     if callback == "langfuse":
+#                         global langFuseLogger
+#                         verbose_logger.debug("reaches langfuse for success logging!")
+#                         kwargs = {}
+#                         for k, v in self.model_call_details.items():
+#                             if (
+#                                 k != "original_response"
+#                             ):  # copy.deepcopy raises errors as this could be a coroutine
+#                                 kwargs[k] = v
+#                         # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+#                         if self.stream:
+#                             verbose_logger.debug(
+#                                 f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
+#                             )
+#                             if complete_streaming_response is None:
+#                                 continue
+#                             else:
+#                                 print_verbose("reaches langfuse for streaming logging!")
+#                                 result = kwargs["complete_streaming_response"]
+#                         if langFuseLogger is None or (
+#                             (
+#                                 self.langfuse_public_key is not None
+#                                 and self.langfuse_public_key
+#                                 != langFuseLogger.public_key
+#                             )
+#                             and (
+#                                 self.langfuse_public_key is not None
+#                                 and self.langfuse_public_key
+#                                 != langFuseLogger.public_key
+#                             )
+#                         ):
+#                             langFuseLogger = LangFuseLogger(
+#                                 langfuse_public_key=self.langfuse_public_key,
+#                                 langfuse_secret=self.langfuse_secret,
+#                             )
+#                         langFuseLogger.log_event(
+#                             kwargs=kwargs,
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             user_id=kwargs.get("user", None),
+#                             print_verbose=print_verbose,
+#                         )
+#                     if callback == "datadog":
+#                         global dataDogLogger
+#                         verbose_logger.debug("reaches datadog for success logging!")
+#                         kwargs = {}
+#                         for k, v in self.model_call_details.items():
+#                             if (
+#                                 k != "original_response"
+#                             ):  # copy.deepcopy raises errors as this could be a coroutine
+#                                 kwargs[k] = v
+#                         # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+#                         if self.stream:
+#                             verbose_logger.debug(
+#                                 f"datadog: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
+#                             )
+#                             if complete_streaming_response is None:
+#                                 continue
+#                             else:
+#                                 print_verbose("reaches datadog for streaming logging!")
+#                                 result = kwargs["complete_streaming_response"]
+#                         dataDogLogger.log_event(
+#                             kwargs=kwargs,
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             user_id=kwargs.get("user", None),
+#                             print_verbose=print_verbose,
+#                         )
+#                     if callback == "prometheus":
+#                         global prometheusLogger
+#                         verbose_logger.debug("reaches prometheus for success logging!")
+#                         kwargs = {}
+#                         for k, v in self.model_call_details.items():
+#                             if (
+#                                 k != "original_response"
+#                             ):  # copy.deepcopy raises errors as this could be a coroutine
+#                                 kwargs[k] = v
+#                         # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+#                         if self.stream:
+#                             verbose_logger.debug(
+#                                 f"prometheus: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
+#                             )
+#                             if complete_streaming_response is None:
+#                                 continue
+#                             else:
+#                                 print_verbose(
+#                                     "reaches prometheus for streaming logging!"
+#                                 )
+#                                 result = kwargs["complete_streaming_response"]
+#                         prometheusLogger.log_event(
+#                             kwargs=kwargs,
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             user_id=kwargs.get("user", None),
+#                             print_verbose=print_verbose,
+#                         )
+#                     if callback == "generic":
+#                         global genericAPILogger
+#                         verbose_logger.debug("reaches langfuse for success logging!")
+#                         kwargs = {}
+#                         for k, v in self.model_call_details.items():
+#                             if (
+#                                 k != "original_response"
+#                             ):  # copy.deepcopy raises errors as this could be a coroutine
+#                                 kwargs[k] = v
+#                         # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+#                         if self.stream:
+#                             verbose_logger.debug(
+#                                 f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
+#                             )
+#                             if complete_streaming_response is None:
+#                                 continue
+#                             else:
+#                                 print_verbose("reaches langfuse for streaming logging!")
+#                                 result = kwargs["complete_streaming_response"]
+#                         if genericAPILogger is None:
+#                             genericAPILogger = GenericAPILogger()
+#                         genericAPILogger.log_event(
+#                             kwargs=kwargs,
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             user_id=kwargs.get("user", None),
+#                             print_verbose=print_verbose,
+#                         )
+#                     if callback == "clickhouse":
+#                         global clickHouseLogger
+#                         verbose_logger.debug("reaches clickhouse for success logging!")
+#                         kwargs = {}
+#                         for k, v in self.model_call_details.items():
+#                             if (
+#                                 k != "original_response"
+#                             ):  # copy.deepcopy raises errors as this could be a coroutine
+#                                 kwargs[k] = v
+#                         # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+#                         if self.stream:
+#                             verbose_logger.debug(
+#                                 f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
+#                             )
+#                             if complete_streaming_response is None:
+#                                 continue
+#                             else:
+#                                 print_verbose(
+#                                     "reaches clickhouse for streaming logging!"
+#                                 )
+#                                 result = kwargs["complete_streaming_response"]
+#                         if clickHouseLogger is None:
+#                             clickHouseLogger = ClickhouseLogger()
+#                         clickHouseLogger.log_event(
+#                             kwargs=kwargs,
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             user_id=kwargs.get("user", None),
+#                             print_verbose=print_verbose,
+#                         )
+#                     if callback == "greenscale":
+#                         kwargs = {}
+#                         for k, v in self.model_call_details.items():
+#                             if (
+#                                 k != "original_response"
+#                             ):  # copy.deepcopy raises errors as this could be a coroutine
+#                                 kwargs[k] = v
+#                         # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+#                         if self.stream:
+#                             verbose_logger.debug(
+#                                 f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
+#                             )
+#                             if complete_streaming_response is None:
+#                                 continue
+#                             else:
+#                                 print_verbose(
+#                                     "reaches greenscale for streaming logging!"
+#                                 )
+#                                 result = kwargs["complete_streaming_response"]
+
+#                         greenscaleLogger.log_event(
+#                             kwargs=kwargs,
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             print_verbose=print_verbose,
+#                         )
+#                     if callback == "cache" and litellm.cache is not None:
+#                         # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+#                         print_verbose("success_callback: reaches cache for logging!")
+#                         kwargs = self.model_call_details
+#                         if self.stream:
+#                             if "complete_streaming_response" not in kwargs:
+#                                 print_verbose(
+#                                     f"success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
+#                                 )
+#                                 pass
+#                             else:
+#                                 print_verbose(
+#                                     "success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
+#                                 )
+#                                 result = kwargs["complete_streaming_response"]
+#                                 # only add to cache once we have a complete streaming response
+#                                 litellm.cache.add_cache(result, **kwargs)
+#                     if callback == "athina":
+#                         deep_copy = {}
+#                         for k, v in self.model_call_details.items():
+#                             deep_copy[k] = v
+#                         athinaLogger.log_event(
+#                             kwargs=deep_copy,
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             print_verbose=print_verbose,
+#                         )
+#                     if callback == "traceloop":
+#                         deep_copy = {}
+#                         for k, v in self.model_call_details.items():
+#                             if k != "original_response":
+#                                 deep_copy[k] = v
+#                         traceloopLogger.log_event(
+#                             kwargs=deep_copy,
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             user_id=kwargs.get("user", None),
+#                             print_verbose=print_verbose,
+#                         )
+#                     if callback == "s3":
+#                         global s3Logger
+#                         if s3Logger is None:
+#                             s3Logger = S3Logger()
+#                         if self.stream:
+#                             if "complete_streaming_response" in self.model_call_details:
+#                                 print_verbose(
+#                                     "S3Logger Logger: Got Stream Event - Completed Stream Response"
+#                                 )
+#                                 s3Logger.log_event(
+#                                     kwargs=self.model_call_details,
+#                                     response_obj=self.model_call_details[
+#                                         "complete_streaming_response"
+#                                     ],
+#                                     start_time=start_time,
+#                                     end_time=end_time,
+#                                     print_verbose=print_verbose,
+#                                 )
+#                             else:
+#                                 print_verbose(
+#                                     "S3Logger Logger: Got Stream Event - No complete stream response as yet"
+#                                 )
+#                         else:
+#                             s3Logger.log_event(
+#                                 kwargs=self.model_call_details,
+#                                 response_obj=result,
+#                                 start_time=start_time,
+#                                 end_time=end_time,
+#                                 print_verbose=print_verbose,
+#                             )
+#                     if (
+#                         callback == "openmeter"
+#                         and self.model_call_details.get("litellm_params", {}).get(
+#                             "acompletion", False
+#                         )
+#                         == False
+#                         and self.model_call_details.get("litellm_params", {}).get(
+#                             "aembedding", False
+#                         )
+#                         == False
+#                         and self.model_call_details.get("litellm_params", {}).get(
+#                             "aimage_generation", False
+#                         )
+#                         == False
+#                         and self.model_call_details.get("litellm_params", {}).get(
+#                             "atranscription", False
+#                         )
+#                         == False
+#                     ):
+#                         global openMeterLogger
+#                         if openMeterLogger is None:
+#                             print_verbose("Instantiates openmeter client")
+#                             openMeterLogger = OpenMeterLogger()
+#                         if self.stream and complete_streaming_response is None:
+#                             openMeterLogger.log_stream_event(
+#                                 kwargs=self.model_call_details,
+#                                 response_obj=result,
+#                                 start_time=start_time,
+#                                 end_time=end_time,
+#                             )
+#                         else:
+#                             if self.stream and complete_streaming_response:
+#                                 self.model_call_details["complete_response"] = (
+#                                     self.model_call_details.get(
+#                                         "complete_streaming_response", {}
+#                                     )
+#                                 )
+#                                 result = self.model_call_details["complete_response"]
+#                             openMeterLogger.log_success_event(
+#                                 kwargs=self.model_call_details,
+#                                 response_obj=result,
+#                                 start_time=start_time,
+#                                 end_time=end_time,
+#                             )
+
+#                     if (
+#                         isinstance(callback, CustomLogger)
+#                         and self.model_call_details.get("litellm_params", {}).get(
+#                             "acompletion", False
+#                         )
+#                         == False
+#                         and self.model_call_details.get("litellm_params", {}).get(
+#                             "aembedding", False
+#                         )
+#                         == False
+#                         and self.model_call_details.get("litellm_params", {}).get(
+#                             "aimage_generation", False
+#                         )
+#                         == False
+#                         and self.model_call_details.get("litellm_params", {}).get(
+#                             "atranscription", False
+#                         )
+#                         == False
+#                     ):  # custom logger class
+#                         if self.stream and complete_streaming_response is None:
+#                             callback.log_stream_event(
+#                                 kwargs=self.model_call_details,
+#                                 response_obj=result,
+#                                 start_time=start_time,
+#                                 end_time=end_time,
+#                             )
+#                         else:
+#                             if self.stream and complete_streaming_response:
+#                                 self.model_call_details["complete_response"] = (
+#                                     self.model_call_details.get(
+#                                         "complete_streaming_response", {}
+#                                     )
+#                                 )
+#                                 result = self.model_call_details["complete_response"]
+#                             callback.log_success_event(
+#                                 kwargs=self.model_call_details,
+#                                 response_obj=result,
+#                                 start_time=start_time,
+#                                 end_time=end_time,
+#                             )
+#                     if (
+#                         callable(callback) == True
+#                         and self.model_call_details.get("litellm_params", {}).get(
+#                             "acompletion", False
+#                         )
+#                         == False
+#                         and self.model_call_details.get("litellm_params", {}).get(
+#                             "aembedding", False
+#                         )
+#                         == False
+#                         and self.model_call_details.get("litellm_params", {}).get(
+#                             "aimage_generation", False
+#                         )
+#                         == False
+#                         and self.model_call_details.get("litellm_params", {}).get(
+#                             "atranscription", False
+#                         )
+#                         == False
+#                     ):  # custom logger functions
+#                         print_verbose(
+#                             f"success callbacks: Running Custom Callback Function"
+#                         )
+#                         customLogger.log_event(
+#                             kwargs=self.model_call_details,
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             print_verbose=print_verbose,
+#                             callback_func=callback,
+#                         )
+
+#                 except Exception as e:
+#                     print_verbose(
+#                         f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging with integrations {traceback.format_exc()}"
+#                     )
+#                     print_verbose(
+#                         f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
+#                     )
+#                     if capture_exception:  # log this error to sentry for debugging
+#                         capture_exception(e)
+#         except:
+#             print_verbose(
+#                 "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {}\n{}".format(
+#                     str(e), traceback.format_exc()
+#                 ),
+#                 log_level="ERROR",
+#             )
+#             pass
+
+#     async def async_success_handler(
+#         self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs
+#     ):
+#         """
+#         Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions.
+#         """
+#         print_verbose("Logging Details LiteLLM-Async Success Call")
+#         start_time, end_time, result = self._success_handler_helper_fn(
+#             start_time=start_time, end_time=end_time, result=result, cache_hit=cache_hit
+#         )
+#         ## BUILD COMPLETE STREAMED RESPONSE
+#         complete_streaming_response = None
+#         if self.stream:
+#             if result.choices[0].finish_reason is not None:  # if it's the last chunk
+#                 self.streaming_chunks.append(result)
+#                 # verbose_logger.debug(f"final set of received chunks: {self.streaming_chunks}")
+#                 try:
+#                     complete_streaming_response = litellm.stream_chunk_builder(
+#                         self.streaming_chunks,
+#                         messages=self.model_call_details.get("messages", None),
+#                         start_time=start_time,
+#                         end_time=end_time,
+#                     )
+#                 except Exception as e:
+#                     print_verbose(
+#                         "Error occurred building stream chunk in success logging: {}\n{}".format(
+#                             str(e), traceback.format_exc()
+#                         ),
+#                         log_level="ERROR",
+#                     )
+#                     complete_streaming_response = None
+#             else:
+#                 self.streaming_chunks.append(result)
+#         if complete_streaming_response is not None:
+#             print_verbose("Async success callbacks: Got a complete streaming response")
+#             self.model_call_details["async_complete_streaming_response"] = (
+#                 complete_streaming_response
+#             )
+#             try:
+#                 if self.model_call_details.get("cache_hit", False) is True:
+#                     self.model_call_details["response_cost"] = 0.0
+#                 else:
+#                     # check if base_model set on azure
+#                     base_model = _get_base_model_from_metadata(
+#                         model_call_details=self.model_call_details
+#                     )
+#                     # base_model defaults to None if not set on model_info
+#                     self.model_call_details["response_cost"] = litellm.completion_cost(
+#                         completion_response=complete_streaming_response,
+#                         model=base_model,
+#                     )
+#                 verbose_logger.debug(
+#                     f"Model={self.model}; cost={self.model_call_details['response_cost']}"
+#                 )
+#             except litellm.NotFoundError as e:
+#                 verbose_logger.error(
+#                     f"Model={self.model} not found in completion cost map. Setting 'response_cost' to None"
+#                 )
+#                 self.model_call_details["response_cost"] = None
+
+#         if self.dynamic_async_success_callbacks is not None and isinstance(
+#             self.dynamic_async_success_callbacks, list
+#         ):
+#             callbacks = self.dynamic_async_success_callbacks
+#             ## keep the internal functions ##
+#             for callback in litellm._async_success_callback:
+#                 callback_name = ""
+#                 if isinstance(callback, CustomLogger):
+#                     callback_name = callback.__class__.__name__
+#                 if callable(callback):
+#                     callback_name = callback.__name__
+#                 if "_PROXY_" in callback_name:
+#                     callbacks.append(callback)
+#         else:
+#             callbacks = litellm._async_success_callback
+
+#         result = redact_message_input_output_from_logging(
+#             result=result, litellm_logging_obj=self
+#         )
+
+#         for callback in callbacks:
+#             # check if callback can run for this request
+#             litellm_params = self.model_call_details.get("litellm_params", {})
+#             if litellm_params.get("no-log", False) == True:
+#                 # proxy cost tracking cal backs should run
+#                 if not (
+#                     isinstance(callback, CustomLogger)
+#                     and "_PROXY_" in callback.__class__.__name__
+#                 ):
+#                     print_verbose("no-log request, skipping logging")
+#                     continue
+#             try:
+#                 if kwargs.get("no-log", False) == True:
+#                     print_verbose("no-log request, skipping logging")
+#                     continue
+#                 if callback == "cache" and litellm.cache is not None:
+#                     # set_cache once complete streaming response is built
+#                     print_verbose("async success_callback: reaches cache for logging!")
+#                     kwargs = self.model_call_details
+#                     if self.stream:
+#                         if "async_complete_streaming_response" not in kwargs:
+#                             print_verbose(
+#                                 f"async success_callback: reaches cache for logging, there is no async_complete_streaming_response. Kwargs={kwargs}\n\n"
+#                             )
+#                             pass
+#                         else:
+#                             print_verbose(
+#                                 "async success_callback: reaches cache for logging, there is a async_complete_streaming_response. Adding to cache"
+#                             )
+#                             result = kwargs["async_complete_streaming_response"]
+#                             # only add to cache once we have a complete streaming response
+#                             if litellm.cache is not None and not isinstance(
+#                                 litellm.cache.cache, S3Cache
+#                             ):
+#                                 await litellm.cache.async_add_cache(result, **kwargs)
+#                             else:
+#                                 litellm.cache.add_cache(result, **kwargs)
+#                 if callback == "openmeter":
+#                     global openMeterLogger
+#                     if self.stream == True:
+#                         if (
+#                             "async_complete_streaming_response"
+#                             in self.model_call_details
+#                         ):
+#                             await openMeterLogger.async_log_success_event(
+#                                 kwargs=self.model_call_details,
+#                                 response_obj=self.model_call_details[
+#                                     "async_complete_streaming_response"
+#                                 ],
+#                                 start_time=start_time,
+#                                 end_time=end_time,
+#                             )
+#                         else:
+#                             await openMeterLogger.async_log_stream_event(  # [TODO]: move this to being an async log stream event function
+#                                 kwargs=self.model_call_details,
+#                                 response_obj=result,
+#                                 start_time=start_time,
+#                                 end_time=end_time,
+#                             )
+#                     else:
+#                         await openMeterLogger.async_log_success_event(
+#                             kwargs=self.model_call_details,
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                         )
+#                 if isinstance(callback, CustomLogger):  # custom logger class
+#                     if self.stream == True:
+#                         if (
+#                             "async_complete_streaming_response"
+#                             in self.model_call_details
+#                         ):
+#                             await callback.async_log_success_event(
+#                                 kwargs=self.model_call_details,
+#                                 response_obj=self.model_call_details[
+#                                     "async_complete_streaming_response"
+#                                 ],
+#                                 start_time=start_time,
+#                                 end_time=end_time,
+#                             )
+#                         else:
+#                             await callback.async_log_stream_event(  # [TODO]: move this to being an async log stream event function
+#                                 kwargs=self.model_call_details,
+#                                 response_obj=result,
+#                                 start_time=start_time,
+#                                 end_time=end_time,
+#                             )
+#                     else:
+#                         await callback.async_log_success_event(
+#                             kwargs=self.model_call_details,
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                         )
+#                 if callable(callback):  # custom logger functions
+#                     if self.stream:
+#                         if (
+#                             "async_complete_streaming_response"
+#                             in self.model_call_details
+#                         ):
+#                             await customLogger.async_log_event(
+#                                 kwargs=self.model_call_details,
+#                                 response_obj=self.model_call_details[
+#                                     "async_complete_streaming_response"
+#                                 ],
+#                                 start_time=start_time,
+#                                 end_time=end_time,
+#                                 print_verbose=print_verbose,
+#                                 callback_func=callback,
+#                             )
+#                     else:
+#                         await customLogger.async_log_event(
+#                             kwargs=self.model_call_details,
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             print_verbose=print_verbose,
+#                             callback_func=callback,
+#                         )
+#                 if callback == "dynamodb":
+#                     global dynamoLogger
+#                     if dynamoLogger is None:
+#                         dynamoLogger = DyanmoDBLogger()
+#                     if self.stream:
+#                         if (
+#                             "async_complete_streaming_response"
+#                             in self.model_call_details
+#                         ):
+#                             print_verbose(
+#                                 "DynamoDB Logger: Got Stream Event - Completed Stream Response"
+#                             )
+#                             await dynamoLogger._async_log_event(
+#                                 kwargs=self.model_call_details,
+#                                 response_obj=self.model_call_details[
+#                                     "async_complete_streaming_response"
+#                                 ],
+#                                 start_time=start_time,
+#                                 end_time=end_time,
+#                                 print_verbose=print_verbose,
+#                             )
+#                         else:
+#                             print_verbose(
+#                                 "DynamoDB Logger: Got Stream Event - No complete stream response as yet"
+#                             )
+#                     else:
+#                         await dynamoLogger._async_log_event(
+#                             kwargs=self.model_call_details,
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             print_verbose=print_verbose,
+#                         )
+#             except Exception as e:
+#                 verbose_logger.error(
+#                     f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
+#                 )
+#                 pass
+
+#     def _failure_handler_helper_fn(
+#         self, exception, traceback_exception, start_time=None, end_time=None
+#     ):
+#         if start_time is None:
+#             start_time = self.start_time
+#         if end_time is None:
+#             end_time = datetime.datetime.now()
+
+#         # on some exceptions, model_call_details is not always initialized, this ensures that we still log those exceptions
+#         if not hasattr(self, "model_call_details"):
+#             self.model_call_details = {}
+
+#         self.model_call_details["log_event_type"] = "failed_api_call"
+#         self.model_call_details["exception"] = exception
+#         self.model_call_details["traceback_exception"] = traceback_exception
+#         self.model_call_details["end_time"] = end_time
+#         self.model_call_details.setdefault("original_response", None)
+#         return start_time, end_time
+
+#     def failure_handler(
+#         self, exception, traceback_exception, start_time=None, end_time=None
+#     ):
+#         print_verbose(
+#             f"Logging Details LiteLLM-Failure Call: {litellm.failure_callback}"
+#         )
+#         try:
+#             start_time, end_time = self._failure_handler_helper_fn(
+#                 exception=exception,
+#                 traceback_exception=traceback_exception,
+#                 start_time=start_time,
+#                 end_time=end_time,
+#             )
+#             callbacks = []  # init this to empty incase it's not created
+
+#             if self.dynamic_failure_callbacks is not None and isinstance(
+#                 self.dynamic_failure_callbacks, list
+#             ):
+#                 callbacks = self.dynamic_failure_callbacks
+#                 ## keep the internal functions ##
+#                 for callback in litellm.failure_callback:
+#                     if (
+#                         isinstance(callback, CustomLogger)
+#                         and "_PROXY_" in callback.__class__.__name__
+#                     ):
+#                         callbacks.append(callback)
+#             else:
+#                 callbacks = litellm.failure_callback
+
+#             result = None  # result sent to all loggers, init this to None incase it's not created
+
+#             result = redact_message_input_output_from_logging(
+#                 result=result, litellm_logging_obj=self
+#             )
+#             for callback in callbacks:
+#                 try:
+#                     if callback == "lite_debugger":
+#                         print_verbose("reaches lite_debugger for logging!")
+#                         print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
+#                         result = {
+#                             "model": self.model,
+#                             "created": time.time(),
+#                             "error": traceback_exception,
+#                             "usage": {
+#                                 "prompt_tokens": prompt_token_calculator(
+#                                     self.model, messages=self.messages
+#                                 ),
+#                                 "completion_tokens": 0,
+#                             },
+#                         }
+#                         liteDebuggerClient.log_event(
+#                             model=self.model,
+#                             messages=self.messages,
+#                             end_user=self.model_call_details.get("user", "default"),
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             litellm_call_id=self.litellm_call_id,
+#                             print_verbose=print_verbose,
+#                             call_type=self.call_type,
+#                             stream=self.stream,
+#                         )
+#                     if callback == "lunary":
+#                         print_verbose("reaches lunary for logging error!")
+
+#                         model = self.model
+
+#                         input = self.model_call_details["input"]
+
+#                         _type = (
+#                             "embed"
+#                             if self.call_type == CallTypes.embedding.value
+#                             else "llm"
+#                         )
+
+#                         lunaryLogger.log_event(
+#                             type=_type,
+#                             event="error",
+#                             user_id=self.model_call_details.get("user", "default"),
+#                             model=model,
+#                             input=input,
+#                             error=traceback_exception,
+#                             run_id=self.litellm_call_id,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             print_verbose=print_verbose,
+#                         )
+#                     if callback == "sentry":
+#                         print_verbose("sending exception to sentry")
+#                         if capture_exception:
+#                             capture_exception(exception)
+#                         else:
+#                             print_verbose(
+#                                 f"capture exception not initialized: {capture_exception}"
+#                             )
+#                     if callable(callback):  # custom logger functions
+#                         customLogger.log_event(
+#                             kwargs=self.model_call_details,
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             print_verbose=print_verbose,
+#                             callback_func=callback,
+#                         )
+#                     if (
+#                         isinstance(callback, CustomLogger)
+#                         and self.model_call_details.get("litellm_params", {}).get(
+#                             "acompletion", False
+#                         )
+#                         == False
+#                         and self.model_call_details.get("litellm_params", {}).get(
+#                             "aembedding", False
+#                         )
+#                         == False
+#                     ):  # custom logger class
+#                         callback.log_failure_event(
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             response_obj=result,
+#                             kwargs=self.model_call_details,
+#                         )
+#                     if callback == "langfuse":
+#                         global langFuseLogger
+#                         verbose_logger.debug("reaches langfuse for logging failure")
+#                         kwargs = {}
+#                         for k, v in self.model_call_details.items():
+#                             if (
+#                                 k != "original_response"
+#                             ):  # copy.deepcopy raises errors as this could be a coroutine
+#                                 kwargs[k] = v
+#                         # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+#                         if langFuseLogger is None or (
+#                             (
+#                                 self.langfuse_public_key is not None
+#                                 and self.langfuse_public_key
+#                                 != langFuseLogger.public_key
+#                             )
+#                             and (
+#                                 self.langfuse_public_key is not None
+#                                 and self.langfuse_public_key
+#                                 != langFuseLogger.public_key
+#                             )
+#                         ):
+#                             langFuseLogger = LangFuseLogger(
+#                                 langfuse_public_key=self.langfuse_public_key,
+#                                 langfuse_secret=self.langfuse_secret,
+#                             )
+#                         langFuseLogger.log_event(
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             response_obj=None,
+#                             user_id=kwargs.get("user", None),
+#                             print_verbose=print_verbose,
+#                             status_message=str(exception),
+#                             level="ERROR",
+#                             kwargs=self.model_call_details,
+#                         )
+#                     if callback == "traceloop":
+#                         traceloopLogger.log_event(
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             response_obj=None,
+#                             user_id=kwargs.get("user", None),
+#                             print_verbose=print_verbose,
+#                             status_message=str(exception),
+#                             level="ERROR",
+#                             kwargs=self.model_call_details,
+#                         )
+#                     if callback == "prometheus":
+#                         global prometheusLogger
+#                         verbose_logger.debug("reaches prometheus for success logging!")
+#                         kwargs = {}
+#                         for k, v in self.model_call_details.items():
+#                             if (
+#                                 k != "original_response"
+#                             ):  # copy.deepcopy raises errors as this could be a coroutine
+#                                 kwargs[k] = v
+#                         kwargs["exception"] = str(exception)
+#                         prometheusLogger.log_event(
+#                             kwargs=kwargs,
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             user_id=kwargs.get("user", None),
+#                             print_verbose=print_verbose,
+#                         )
+
+#                     if callback == "logfire":
+#                         global logfireLogger
+#                         verbose_logger.debug("reaches logfire for failure logging!")
+#                         kwargs = {}
+#                         for k, v in self.model_call_details.items():
+#                             if (
+#                                 k != "original_response"
+#                             ):  # copy.deepcopy raises errors as this could be a coroutine
+#                                 kwargs[k] = v
+#                         kwargs["exception"] = exception
+
+#                         logfireLogger.log_event(
+#                             kwargs=kwargs,
+#                             response_obj=result,
+#                             start_time=start_time,
+#                             end_time=end_time,
+#                             level=LogfireLevel.ERROR.value,
+#                             print_verbose=print_verbose,
+#                         )
+#                 except Exception as e:
+#                     print_verbose(
+#                         f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {str(e)}"
+#                     )
+#                     print_verbose(
+#                         f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
+#                     )
+#                     if capture_exception:  # log this error to sentry for debugging
+#                         capture_exception(e)
+#         except Exception as e:
+#             print_verbose(
+#                 f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging {traceback.format_exc()}"
+#             )
+#             pass
+
+#     async def async_failure_handler(
+#         self, exception, traceback_exception, start_time=None, end_time=None
+#     ):
+#         """
+#         Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions.
+#         """
+#         start_time, end_time = self._failure_handler_helper_fn(
+#             exception=exception,
+#             traceback_exception=traceback_exception,
+#             start_time=start_time,
+#             end_time=end_time,
+#         )
+#         result = None  # result sent to all loggers, init this to None incase it's not created
+#         for callback in litellm._async_failure_callback:
+#             try:
+#                 if isinstance(callback, CustomLogger):  # custom logger class
+#                     await callback.async_log_failure_event(
+#                         kwargs=self.model_call_details,
+#                         response_obj=result,
+#                         start_time=start_time,
+#                         end_time=end_time,
+#                     )  # type: ignore
+#                 if callable(callback):  # custom logger functions
+#                     await customLogger.async_log_event(
+#                         kwargs=self.model_call_details,
+#                         response_obj=result,
+#                         start_time=start_time,
+#                         end_time=end_time,
+#                         print_verbose=print_verbose,
+#                         callback_func=callback,
+#                     )
+#             except Exception as e:
+#                 print_verbose(
+#                     f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
+#                 )
diff --git a/litellm/litellm_core_utils/redact_messages.py b/litellm/litellm_core_utils/redact_messages.py
index 9c0df2011..8f270d8be 100644
--- a/litellm/litellm_core_utils/redact_messages.py
+++ b/litellm/litellm_core_utils/redact_messages.py
@@ -12,7 +12,9 @@ from typing import TYPE_CHECKING, Any
 import litellm
 
 if TYPE_CHECKING:
-    from litellm.utils import Logging as _LiteLLMLoggingObject
+    from litellm.litellm_core_utils.litellm_logging import (
+        Logging as _LiteLLMLoggingObject,
+    )
 
     LiteLLMLoggingObject = _LiteLLMLoggingObject
 else:
diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py
index 8e469a8f4..1edd99110 100644
--- a/litellm/llms/anthropic.py
+++ b/litellm/llms/anthropic.py
@@ -5,7 +5,9 @@ import requests, copy  # type: ignore
 import time
 from functools import partial
 from typing import Callable, Optional, List, Union
-from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
+import litellm.litellm_core_utils
+from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
 import litellm
 from .prompt_templates.factory import prompt_factory, custom_prompt
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
@@ -201,7 +203,7 @@ class AnthropicChatCompletion(BaseLLM):
         response: Union[requests.Response, httpx.Response],
         model_response: ModelResponse,
         stream: bool,
-        logging_obj: litellm.utils.Logging,
+        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
         optional_params: dict,
         api_key: str,
         data: Union[dict, str],
@@ -316,7 +318,7 @@ class AnthropicChatCompletion(BaseLLM):
         response: Union[requests.Response, httpx.Response],
         model_response: ModelResponse,
         stream: bool,
-        logging_obj: litellm.utils.Logging,
+        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
         optional_params: dict,
         api_key: str,
         data: Union[dict, str],
diff --git a/litellm/llms/base.py b/litellm/llms/base.py
index 8c2f5101e..0222d2366 100644
--- a/litellm/llms/base.py
+++ b/litellm/llms/base.py
@@ -2,7 +2,7 @@
 import litellm
 import httpx, requests
 from typing import Optional, Union
-from litellm.utils import Logging
+from litellm.litellm_core_utils.litellm_logging import Logging
 
 
 class BaseLLM:
diff --git a/litellm/llms/bedrock.py b/litellm/llms/bedrock.py
index 4314032e7..8d88cdd3d 100644
--- a/litellm/llms/bedrock.py
+++ b/litellm/llms/bedrock.py
@@ -5,12 +5,10 @@ import time, uuid
 from typing import Callable, Optional, Any, Union, List
 import litellm
 from litellm.utils import (
-    ModelResponse,
     get_secret,
-    Usage,
-    ImageResponse,
-    map_finish_reason,
 )
+from litellm.litellm_core_utils.model_response_helpers import map_finish_reason
+from litellm.types.utils import ImageResponse, ModelResponse, Usage
 from .prompt_templates.factory import (
     prompt_factory,
     custom_prompt,
@@ -633,7 +631,11 @@ def init_bedrock_client(
         config = boto3.session.Config()
 
     ### CHECK STS ###
-    if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
+    if (
+        aws_web_identity_token is not None
+        and aws_role_name is not None
+        and aws_session_name is not None
+    ):
         oidc_token = get_secret(aws_web_identity_token)
 
         if oidc_token is None:
@@ -642,9 +644,7 @@ def init_bedrock_client(
                 status_code=401,
             )
 
-        sts_client = boto3.client(
-            "sts"
-        )
+        sts_client = boto3.client("sts")
 
         # https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
         # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
diff --git a/litellm/llms/bedrock_httpx.py b/litellm/llms/bedrock_httpx.py
index 84b61d4cb..ffbc6c680 100644
--- a/litellm/llms/bedrock_httpx.py
+++ b/litellm/llms/bedrock_httpx.py
@@ -22,13 +22,12 @@ from typing import (
 from litellm.utils import (
     ModelResponse,
     Usage,
-    map_finish_reason,
     CustomStreamWrapper,
-    Message,
-    Choices,
     get_secret,
-    Logging,
 )
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
+from litellm.litellm_core_utils.litellm_logging import Logging
+from litellm.types.utils import Message, Choices
 import litellm, uuid
 from .prompt_templates.factory import (
     prompt_factory,
@@ -57,6 +56,7 @@ from litellm.caching import DualCache
 
 iam_cache = DualCache()
 
+
 class AmazonCohereChatConfig:
     """
     Reference - https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-cohere-command-r-plus.html
@@ -327,13 +327,19 @@ class BedrockLLM(BaseLLM):
         ) = params_to_check
 
         ### CHECK STS ###
-        if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
-            iam_creds_cache_key = json.dumps({
-                "aws_web_identity_token": aws_web_identity_token,
-                "aws_role_name": aws_role_name,
-                "aws_session_name": aws_session_name,
-                "aws_region_name": aws_region_name,
-            })
+        if (
+            aws_web_identity_token is not None
+            and aws_role_name is not None
+            and aws_session_name is not None
+        ):
+            iam_creds_cache_key = json.dumps(
+                {
+                    "aws_web_identity_token": aws_web_identity_token,
+                    "aws_role_name": aws_role_name,
+                    "aws_session_name": aws_session_name,
+                    "aws_region_name": aws_region_name,
+                }
+            )
 
             iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key)
             if iam_creds_dict is None:
@@ -348,7 +354,7 @@ class BedrockLLM(BaseLLM):
                 sts_client = boto3.client(
                     "sts",
                     region_name=aws_region_name,
-                    endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com"
+                    endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com",
                 )
 
                 # https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
@@ -362,12 +368,18 @@ class BedrockLLM(BaseLLM):
 
                 iam_creds_dict = {
                     "aws_access_key_id": sts_response["Credentials"]["AccessKeyId"],
-                    "aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"],
+                    "aws_secret_access_key": sts_response["Credentials"][
+                        "SecretAccessKey"
+                    ],
                     "aws_session_token": sts_response["Credentials"]["SessionToken"],
                     "region_name": aws_region_name,
                 }
 
-                iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60)
+                iam_cache.set_cache(
+                    key=iam_creds_cache_key,
+                    value=json.dumps(iam_creds_dict),
+                    ttl=3600 - 60,
+                )
 
             session = boto3.Session(**iam_creds_dict)
 
@@ -1433,13 +1445,19 @@ class BedrockConverseLLM(BaseLLM):
         ) = params_to_check
 
         ### CHECK STS ###
-        if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
-            iam_creds_cache_key = json.dumps({
-                "aws_web_identity_token": aws_web_identity_token,
-                "aws_role_name": aws_role_name,
-                "aws_session_name": aws_session_name,
-                "aws_region_name": aws_region_name,
-            })
+        if (
+            aws_web_identity_token is not None
+            and aws_role_name is not None
+            and aws_session_name is not None
+        ):
+            iam_creds_cache_key = json.dumps(
+                {
+                    "aws_web_identity_token": aws_web_identity_token,
+                    "aws_role_name": aws_role_name,
+                    "aws_session_name": aws_session_name,
+                    "aws_region_name": aws_region_name,
+                }
+            )
 
             iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key)
             if iam_creds_dict is None:
@@ -1454,7 +1472,7 @@ class BedrockConverseLLM(BaseLLM):
                 sts_client = boto3.client(
                     "sts",
                     region_name=aws_region_name,
-                    endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com"
+                    endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com",
                 )
 
                 # https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
@@ -1468,12 +1486,18 @@ class BedrockConverseLLM(BaseLLM):
 
                 iam_creds_dict = {
                     "aws_access_key_id": sts_response["Credentials"]["AccessKeyId"],
-                    "aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"],
+                    "aws_secret_access_key": sts_response["Credentials"][
+                        "SecretAccessKey"
+                    ],
                     "aws_session_token": sts_response["Credentials"]["SessionToken"],
                     "region_name": aws_region_name,
                 }
 
-                iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60)
+                iam_cache.set_cache(
+                    key=iam_creds_cache_key,
+                    value=json.dumps(iam_creds_dict),
+                    ttl=3600 - 60,
+                )
 
             session = boto3.Session(**iam_creds_dict)
 
diff --git a/litellm/llms/databricks.py b/litellm/llms/databricks.py
index 4fe475259..1ab09246b 100644
--- a/litellm/llms/databricks.py
+++ b/litellm/llms/databricks.py
@@ -10,10 +10,10 @@ from typing import Callable, Optional, List, Union, Tuple, Literal
 from litellm.utils import (
     ModelResponse,
     Usage,
-    map_finish_reason,
     CustomStreamWrapper,
     EmbeddingResponse,
 )
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
 import litellm
 from .prompt_templates.factory import prompt_factory, custom_prompt
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
@@ -289,7 +289,7 @@ class DatabricksChatCompletion(BaseLLM):
         response: Union[requests.Response, httpx.Response],
         model_response: ModelResponse,
         stream: bool,
-        logging_obj: litellm.utils.Logging,
+        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
         optional_params: dict,
         api_key: str,
         data: Union[dict, str],
diff --git a/litellm/llms/predibase.py b/litellm/llms/predibase.py
index 66c28acee..8ad294457 100644
--- a/litellm/llms/predibase.py
+++ b/litellm/llms/predibase.py
@@ -12,11 +12,11 @@ from typing import Callable, Optional, List, Literal, Union
 from litellm.utils import (
     ModelResponse,
     Usage,
-    map_finish_reason,
     CustomStreamWrapper,
     Message,
     Choices,
 )
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
 import litellm
 from .prompt_templates.factory import prompt_factory, custom_prompt
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
@@ -198,7 +198,7 @@ class PredibaseChatCompletion(BaseLLM):
         response: Union[requests.Response, httpx.Response],
         model_response: ModelResponse,
         stream: bool,
-        logging_obj: litellm.utils.Logging,
+        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
         optional_params: dict,
         api_key: str,
         data: Union[dict, str],
diff --git a/litellm/llms/triton.py b/litellm/llms/triton.py
index 711186b3f..d647c9c43 100644
--- a/litellm/llms/triton.py
+++ b/litellm/llms/triton.py
@@ -4,7 +4,6 @@ from enum import Enum
 import requests, copy  # type: ignore
 import time
 from typing import Callable, Optional, List
-from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
 import litellm
 from .prompt_templates.factory import prompt_factory, custom_prompt
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
diff --git a/litellm/llms/vertex_ai.py b/litellm/llms/vertex_ai.py
index 67a8a4519..28cdde518 100644
--- a/litellm/llms/vertex_ai.py
+++ b/litellm/llms/vertex_ai.py
@@ -5,7 +5,8 @@ import requests  # type: ignore
 import time
 from typing import Callable, Optional, Union, List, Literal, Any
 from pydantic import BaseModel
-from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
+from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
+from litellm.litellm_core_utils.model_response_helpers import map_finish_reason
 import litellm, uuid
 import httpx, inspect  # type: ignore
 from litellm.types.llms.vertex_ai import *
diff --git a/litellm/llms/vertex_ai_anthropic.py b/litellm/llms/vertex_ai_anthropic.py
index 065294280..1907ad5f0 100644
--- a/litellm/llms/vertex_ai_anthropic.py
+++ b/litellm/llms/vertex_ai_anthropic.py
@@ -6,7 +6,8 @@ from enum import Enum
 import requests, copy  # type: ignore
 import time, uuid
 from typing import Callable, Optional, List
-from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
+from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
+from litellm.litellm_core_utils.model_response_helpers import map_finish_reason
 import litellm
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from .prompt_templates.factory import (
diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py
index b1c38f0bc..c9e48f3e1 100644
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@@ -8,7 +8,10 @@ from enum import Enum
 import requests  # type: ignore
 import time
 from typing import Callable, Optional, Union, List, Any, Tuple
-from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
+import litellm.litellm_core_utils
+import litellm.litellm_core_utils.litellm_logging
+from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
 import litellm, uuid
 import httpx, inspect  # type: ignore
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
@@ -320,7 +323,7 @@ class VertexLLM(BaseLLM):
         model: str,
         response: httpx.Response,
         model_response: ModelResponse,
-        logging_obj: litellm.utils.Logging,
+        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
         optional_params: dict,
         api_key: str,
         data: Union[dict, str],
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index ea2d5d3f8..20fa90cbe 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -12,6 +12,8 @@ import litellm
 import backoff
 import traceback
 from pydantic import BaseModel
+import litellm.litellm_core_utils
+import litellm.litellm_core_utils.litellm_logging
 from litellm.proxy._types import (
     UserAPIKeyAuth,
     DynamoDBArgs,
@@ -331,7 +333,9 @@ class ProxyLogging:
             return data
         except Exception as e:
             if "litellm_logging_obj" in data:
-                logging_obj: litellm.utils.Logging = data["litellm_logging_obj"]
+                logging_obj: litellm.litellm_core_utils.litellm_logging.Logging = data[
+                    "litellm_logging_obj"
+                ]
 
                 ## ASYNC FAILURE HANDLER ##
                 error_message = ""
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index 1fbb375d3..312ca210e 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -3,6 +3,15 @@ from typing_extensions import TypedDict
 from enum import Enum
 from typing_extensions import override, Required, Dict
 from .llms.openai import ChatCompletionUsageBlock, ChatCompletionToolCallChunk
+from ..litellm_core_utils.model_response_helpers import map_finish_reason
+from openai._models import BaseModel as OpenAIObject
+from pydantic import ConfigDict
+import uuid
+import json
+
+
+def _generate_id():  # private helper function
+    return "chatcmpl-" + str(uuid.uuid4())
 
 
 class LiteLLMCommonStrings(Enum):
@@ -48,3 +57,904 @@ class GenericStreamingChunk(TypedDict):
     finish_reason: Required[str]
     usage: Optional[ChatCompletionUsageBlock]
     index: int
+
+
+from enum import Enum
+
+
+class CallTypes(Enum):
+    embedding = "embedding"
+    aembedding = "aembedding"
+    completion = "completion"
+    acompletion = "acompletion"
+    atext_completion = "atext_completion"
+    text_completion = "text_completion"
+    image_generation = "image_generation"
+    aimage_generation = "aimage_generation"
+    moderation = "moderation"
+    amoderation = "amoderation"
+    atranscription = "atranscription"
+    transcription = "transcription"
+    aspeech = "aspeech"
+    speech = "speech"
+
+
+class TopLogprob(OpenAIObject):
+    token: str
+    """The token."""
+
+    bytes: Optional[List[int]] = None
+    """A list of integers representing the UTF-8 bytes representation of the token.
+
+    Useful in instances where characters are represented by multiple tokens and
+    their byte representations must be combined to generate the correct text
+    representation. Can be `null` if there is no bytes representation for the token.
+    """
+
+    logprob: float
+    """The log probability of this token, if it is within the top 20 most likely
+    tokens.
+
+    Otherwise, the value `-9999.0` is used to signify that the token is very
+    unlikely.
+    """
+
+
+class ChatCompletionTokenLogprob(OpenAIObject):
+    token: str
+    """The token."""
+
+    bytes: Optional[List[int]] = None
+    """A list of integers representing the UTF-8 bytes representation of the token.
+
+    Useful in instances where characters are represented by multiple tokens and
+    their byte representations must be combined to generate the correct text
+    representation. Can be `null` if there is no bytes representation for the token.
+    """
+
+    logprob: float
+    """The log probability of this token, if it is within the top 20 most likely
+    tokens.
+
+    Otherwise, the value `-9999.0` is used to signify that the token is very
+    unlikely.
+    """
+
+    top_logprobs: List[TopLogprob]
+    """List of the most likely tokens and their log probability, at this token
+    position.
+
+    In rare cases, there may be fewer than the number of requested `top_logprobs`
+    returned.
+    """
+
+
+class ChoiceLogprobs(OpenAIObject):
+    content: Optional[List[ChatCompletionTokenLogprob]] = None
+    """A list of message content tokens with log probability information."""
+
+
+class FunctionCall(OpenAIObject):
+    arguments: str
+    name: Optional[str] = None
+
+
+class Function(OpenAIObject):
+    arguments: str
+    name: Optional[str] = None
+
+    def __init__(
+        self,
+        arguments: Union[Dict, str],
+        name: Optional[str] = None,
+        **params,
+    ):
+        if isinstance(arguments, Dict):
+            arguments = json.dumps(arguments)
+        else:
+            arguments = arguments
+
+        name = name
+
+        # Build a dictionary with the structure your BaseModel expects
+        data = {"arguments": arguments, "name": name, **params}
+
+        super(Function, self).__init__(**data)
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+
+class ChatCompletionDeltaToolCall(OpenAIObject):
+    id: Optional[str] = None
+    function: Function
+    type: Optional[str] = None
+    index: int
+
+
+class HiddenParams(OpenAIObject):
+    original_response: Optional[str] = None
+    model_id: Optional[str] = None  # used in Router for individual deployments
+    api_base: Optional[str] = None  # returns api base used for making completion call
+
+    model_config = ConfigDict(extra="allow", protected_namespaces=())
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+    def json(self, **kwargs):
+        try:
+            return self.model_dump()  # noqa
+        except:
+            # if using pydantic v1
+            return self.dict()
+
+
+class ChatCompletionMessageToolCall(OpenAIObject):
+    def __init__(
+        self,
+        function: Union[Dict, Function],
+        id: Optional[str] = None,
+        type: Optional[str] = None,
+        **params,
+    ):
+        super(ChatCompletionMessageToolCall, self).__init__(**params)
+        if isinstance(function, Dict):
+            self.function = Function(**function)
+        else:
+            self.function = function
+
+        if id is not None:
+            self.id = id
+        else:
+            self.id = f"{uuid.uuid4()}"
+
+        if type is not None:
+            self.type = type
+        else:
+            self.type = "function"
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+
+class Message(OpenAIObject):
+    def __init__(
+        self,
+        content: Optional[str] = "default",
+        role="assistant",
+        logprobs=None,
+        function_call=None,
+        tool_calls=None,
+        **params,
+    ):
+        super(Message, self).__init__(**params)
+        self.content = content
+        self.role = role
+        if function_call is not None:
+            self.function_call = FunctionCall(**function_call)
+
+        if tool_calls is not None:
+            self.tool_calls = []
+            for tool_call in tool_calls:
+                self.tool_calls.append(ChatCompletionMessageToolCall(**tool_call))
+
+        if logprobs is not None:
+            self._logprobs = ChoiceLogprobs(**logprobs)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+    def json(self, **kwargs):
+        try:
+            return self.model_dump()  # noqa
+        except:
+            # if using pydantic v1
+            return self.dict()
+
+
+class Delta(OpenAIObject):
+    def __init__(
+        self,
+        content=None,
+        role=None,
+        function_call=None,
+        tool_calls=None,
+        **params,
+    ):
+        super(Delta, self).__init__(**params)
+        self.content = content
+        self.role = role
+
+        if function_call is not None and isinstance(function_call, dict):
+            self.function_call = FunctionCall(**function_call)
+        else:
+            self.function_call = function_call
+        if tool_calls is not None and isinstance(tool_calls, list):
+            self.tool_calls = []
+            for tool_call in tool_calls:
+                if isinstance(tool_call, dict):
+                    if tool_call.get("index", None) is None:
+                        tool_call["index"] = 0
+                    self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call))
+                elif isinstance(tool_call, ChatCompletionDeltaToolCall):
+                    self.tool_calls.append(tool_call)
+        else:
+            self.tool_calls = tool_calls
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+
+class Choices(OpenAIObject):
+    def __init__(
+        self,
+        finish_reason=None,
+        index=0,
+        message: Optional[Union[Message, dict]] = None,
+        logprobs=None,
+        enhancements=None,
+        **params,
+    ):
+        super(Choices, self).__init__(**params)
+        if finish_reason is not None:
+            self.finish_reason = map_finish_reason(
+                finish_reason
+            )  # set finish_reason for all responses
+        else:
+            self.finish_reason = "stop"
+        self.index = index
+        if message is None:
+            self.message = Message()
+        else:
+            if isinstance(message, Message):
+                self.message = message
+            elif isinstance(message, dict):
+                self.message = Message(**message)
+        if logprobs is not None:
+            self.logprobs = logprobs
+        if enhancements is not None:
+            self.enhancements = enhancements
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+
+class Usage(OpenAIObject):
+    def __init__(
+        self, prompt_tokens=None, completion_tokens=None, total_tokens=None, **params
+    ):
+        super(Usage, self).__init__(**params)
+        if prompt_tokens:
+            self.prompt_tokens = prompt_tokens
+        if completion_tokens:
+            self.completion_tokens = completion_tokens
+        if total_tokens:
+            self.total_tokens = total_tokens
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+
+class StreamingChoices(OpenAIObject):
+    def __init__(
+        self,
+        finish_reason=None,
+        index=0,
+        delta: Optional[Delta] = None,
+        logprobs=None,
+        enhancements=None,
+        **params,
+    ):
+        super(StreamingChoices, self).__init__(**params)
+        if finish_reason:
+            self.finish_reason = finish_reason
+        else:
+            self.finish_reason = None
+        self.index = index
+        if delta is not None:
+            if isinstance(delta, Delta):
+                self.delta = delta
+            elif isinstance(delta, dict):
+                self.delta = Delta(**delta)
+        else:
+            self.delta = Delta()
+        if enhancements is not None:
+            self.enhancements = enhancements
+
+        if logprobs is not None and isinstance(logprobs, dict):
+            self.logprobs = ChoiceLogprobs(**logprobs)
+        else:
+            self.logprobs = logprobs  # type: ignore
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+
+class ModelResponse(OpenAIObject):
+    id: str
+    """A unique identifier for the completion."""
+
+    choices: List[Union[Choices, StreamingChoices]]
+    """The list of completion choices the model generated for the input prompt."""
+
+    created: int
+    """The Unix timestamp (in seconds) of when the completion was created."""
+
+    model: Optional[str] = None
+    """The model used for completion."""
+
+    object: str
+    """The object type, which is always "text_completion" """
+
+    system_fingerprint: Optional[str] = None
+    """This fingerprint represents the backend configuration that the model runs with.
+
+    Can be used in conjunction with the `seed` request parameter to understand when
+    backend changes have been made that might impact determinism.
+    """
+
+    _hidden_params: dict = {}
+
+    def __init__(
+        self,
+        id=None,
+        choices=None,
+        created=None,
+        model=None,
+        object=None,
+        system_fingerprint=None,
+        usage=None,
+        stream=None,
+        stream_options=None,
+        response_ms=None,
+        hidden_params=None,
+        **params,
+    ):
+        if stream is not None and stream is True:
+            object = "chat.completion.chunk"
+            if choices is not None and isinstance(choices, list):
+                new_choices = []
+                for choice in choices:
+                    if isinstance(choice, StreamingChoices):
+                        _new_choice = choice
+                    elif isinstance(choice, dict):
+                        _new_choice = StreamingChoices(**choice)
+                    new_choices.append(_new_choice)
+                choices = new_choices
+            else:
+                choices = [StreamingChoices()]
+        else:
+            object = "chat.completion"
+            if choices is not None and isinstance(choices, list):
+                new_choices = []
+                for choice in choices:
+                    if isinstance(choice, Choices):
+                        _new_choice = choice
+                    elif isinstance(choice, dict):
+                        _new_choice = Choices(**choice)
+                    new_choices.append(_new_choice)
+                choices = new_choices
+            else:
+                choices = [Choices()]
+        if id is None:
+            id = _generate_id()
+        else:
+            id = id
+        if created is None:
+            created = int(time.time())
+        else:
+            created = created
+        model = model
+        if usage is not None:
+            if isinstance(usage, dict):
+                usage = Usage(**usage)
+            else:
+                usage = usage
+        elif stream is None or stream is False:
+            usage = Usage()
+        if hidden_params:
+            self._hidden_params = hidden_params
+
+        init_values = {
+            "id": id,
+            "choices": choices,
+            "created": created,
+            "model": model,
+            "object": object,
+            "system_fingerprint": system_fingerprint,
+        }
+
+        if usage is not None:
+            init_values["usage"] = usage
+
+        super().__init__(
+            **init_values,
+            **params,
+        )
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+    def json(self, **kwargs):
+        try:
+            return self.model_dump()  # noqa
+        except:
+            # if using pydantic v1
+            return self.dict()
+
+
+class Embedding(OpenAIObject):
+    embedding: Union[list, str] = []
+    index: int
+    object: str
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+
+class EmbeddingResponse(OpenAIObject):
+    model: Optional[str] = None
+    """The model used for embedding."""
+
+    data: Optional[List] = None
+    """The actual embedding value"""
+
+    object: str
+    """The object type, which is always "embedding" """
+
+    usage: Optional[Usage] = None
+    """Usage statistics for the embedding request."""
+
+    _hidden_params: dict = {}
+
+    def __init__(
+        self,
+        model=None,
+        usage=None,
+        stream=False,
+        response_ms=None,
+        data=None,
+        **params,
+    ):
+        object = "list"
+        if response_ms:
+            _response_ms = response_ms
+        else:
+            _response_ms = None
+        if data:
+            data = data
+        else:
+            data = None
+
+        if usage:
+            usage = usage
+        else:
+            usage = Usage()
+
+        model = model
+        super().__init__(model=model, object=object, data=data, usage=usage)
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+    def json(self, **kwargs):
+        try:
+            return self.model_dump()  # noqa
+        except:
+            # if using pydantic v1
+            return self.dict()
+
+
+class Logprobs(OpenAIObject):
+    text_offset: List[int]
+    token_logprobs: List[float]
+    tokens: List[str]
+    top_logprobs: List[Dict[str, float]]
+
+
+class TextChoices(OpenAIObject):
+    def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params):
+        super(TextChoices, self).__init__(**params)
+        if finish_reason:
+            self.finish_reason = map_finish_reason(finish_reason)
+        else:
+            self.finish_reason = None
+        self.index = index
+        if text is not None:
+            self.text = text
+        else:
+            self.text = None
+        if logprobs is None:
+            self.logprobs = None
+        else:
+            if isinstance(logprobs, dict):
+                self.logprobs = Logprobs(**logprobs)
+            else:
+                self.logprobs = logprobs
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+    def json(self, **kwargs):
+        try:
+            return self.model_dump()  # noqa
+        except:
+            # if using pydantic v1
+            return self.dict()
+
+
+class TextCompletionResponse(OpenAIObject):
+    """
+    {
+        "id": response["id"],
+        "object": "text_completion",
+        "created": response["created"],
+        "model": response["model"],
+        "choices": [
+        {
+            "text": response["choices"][0]["message"]["content"],
+            "index": response["choices"][0]["index"],
+            "logprobs": transformed_logprobs,
+            "finish_reason": response["choices"][0]["finish_reason"]
+        }
+        ],
+        "usage": response["usage"]
+    }
+    """
+
+    id: str
+    object: str
+    created: int
+    model: Optional[str]
+    choices: List[TextChoices]
+    usage: Optional[Usage]
+    _response_ms: Optional[int] = None
+    _hidden_params: HiddenParams
+
+    def __init__(
+        self,
+        id=None,
+        choices=None,
+        created=None,
+        model=None,
+        usage=None,
+        stream=False,
+        response_ms=None,
+        object=None,
+        **params,
+    ):
+        if stream:
+            object = "text_completion.chunk"
+            choices = [TextChoices()]
+        else:
+            object = "text_completion"
+            if choices is not None and isinstance(choices, list):
+                new_choices = []
+                for choice in choices:
+                    if isinstance(choice, TextChoices):
+                        _new_choice = choice
+                    elif isinstance(choice, dict):
+                        _new_choice = TextChoices(**choice)
+                    new_choices.append(_new_choice)
+                choices = new_choices
+            else:
+                choices = [TextChoices()]
+        if object is not None:
+            object = object
+        if id is None:
+            id = _generate_id()
+        else:
+            id = id
+        if created is None:
+            created = int(time.time())
+        else:
+            created = created
+
+        model = model
+        if usage:
+            usage = usage
+        else:
+            usage = Usage()
+
+        super(TextCompletionResponse, self).__init__(
+            id=id,
+            object=object,
+            created=created,
+            model=model,
+            choices=choices,
+            usage=usage,
+            **params,
+        )
+
+        if response_ms:
+            self._response_ms = response_ms
+        else:
+            self._response_ms = None
+        self._hidden_params = HiddenParams()
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+
+class ImageObject(OpenAIObject):
+    """
+    Represents the url or the content of an image generated by the OpenAI API.
+
+    Attributes:
+    b64_json: The base64-encoded JSON of the generated image, if response_format is b64_json.
+    url: The URL of the generated image, if response_format is url (default).
+    revised_prompt: The prompt that was used to generate the image, if there was any revision to the prompt.
+
+    https://platform.openai.com/docs/api-reference/images/object
+    """
+
+    b64_json: Optional[str] = None
+    url: Optional[str] = None
+    revised_prompt: Optional[str] = None
+
+    def __init__(self, b64_json=None, url=None, revised_prompt=None):
+        super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt)
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+    def json(self, **kwargs):
+        try:
+            return self.model_dump()  # noqa
+        except:
+            # if using pydantic v1
+            return self.dict()
+
+
+class ImageResponse(OpenAIObject):
+    created: Optional[int] = None
+
+    data: Optional[List[ImageObject]] = None
+
+    usage: Optional[dict] = None
+
+    _hidden_params: dict = {}
+
+    def __init__(self, created=None, data=None, response_ms=None):
+        if response_ms:
+            _response_ms = response_ms
+        else:
+            _response_ms = None
+        if data:
+            data = data
+        else:
+            data = None
+
+        if created:
+            created = created
+        else:
+            created = None
+
+        super().__init__(data=data, created=created)
+        self.usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+    def json(self, **kwargs):
+        try:
+            return self.model_dump()  # noqa
+        except:
+            # if using pydantic v1
+            return self.dict()
+
+
+class TranscriptionResponse(OpenAIObject):
+    text: Optional[str] = None
+
+    _hidden_params: dict = {}
+
+    def __init__(self, text=None):
+        super().__init__(text=text)
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+    def json(self, **kwargs):
+        try:
+            return self.model_dump()  # noqa
+        except:
+            # if using pydantic v1
+            return self.dict()
diff --git a/litellm/utils.py b/litellm/utils.py
index 7f37bcf7c..a126a10cd 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -18,7 +18,7 @@ from functools import wraps, lru_cache
 import datetime, time
 import tiktoken
 import uuid
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel
 import aiohttp
 import textwrap
 import logging
@@ -32,9 +32,29 @@ from dataclasses import (
 )
 import os
 import litellm._service_logger  # for storing API inputs, outputs, and metadata
+import litellm.litellm_core_utils
+import litellm.litellm_core_utils.litellm_logging
 from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler
 from litellm.caching import DualCache
-from litellm.types.utils import CostPerToken, ProviderField, ModelInfo
+from litellm.types.utils import (
+    CostPerToken,
+    ProviderField,
+    ModelInfo,
+    CallTypes,
+    ModelResponse,
+    EmbeddingResponse,
+    ImageResponse,
+    TranscriptionResponse,
+    TextCompletionResponse,
+    ChatCompletionDeltaToolCall,
+    Message,
+    Delta,
+    Choices,
+    Usage,
+    StreamingChoices,
+    Embedding,
+    TextChoices,
+)
 from litellm.litellm_core_utils.redact_messages import (
     redact_message_input_output_from_logging,
 )
@@ -96,7 +116,6 @@ from .integrations.greenscale import GreenscaleLogger
 from .integrations.litedebugger import LiteDebugger
 from .proxy._types import KeyManagementSystem
 from openai import OpenAIError as OriginalError
-from openai._models import BaseModel as OpenAIObject
 from .caching import S3Cache, RedisSemanticCache, RedisCache
 from .exceptions import (
     AuthenticationError,
@@ -179,6 +198,8 @@ local_cache: Optional[Dict[str, str]] = {}
 last_fetched_at = None
 last_fetched_at_keys = None
 ######## Model Response #########################
+
+
 # All liteLLM Model responses will be in this format, Follows the OpenAI Format
 # https://docs.litellm.ai/docs/completion/output
 # {
@@ -209,933 +230,6 @@ class UnsupportedParamsError(Exception):
         )  # Call the base class constructor with the parameters it needs
 
 
-def _generate_id():  # private helper function
-    return "chatcmpl-" + str(uuid.uuid4())
-
-
-def map_finish_reason(
-    finish_reason: str,
-):  # openai supports 5 stop sequences - 'stop', 'length', 'function_call', 'content_filter', 'null'
-    # anthropic mapping
-    if finish_reason == "stop_sequence":
-        return "stop"
-    # cohere mapping - https://docs.cohere.com/reference/generate
-    elif finish_reason == "COMPLETE":
-        return "stop"
-    elif finish_reason == "MAX_TOKENS":  # cohere + vertex ai
-        return "length"
-    elif finish_reason == "ERROR_TOXIC":
-        return "content_filter"
-    elif (
-        finish_reason == "ERROR"
-    ):  # openai currently doesn't support an 'error' finish reason
-        return "stop"
-    # huggingface mapping https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/generate_stream
-    elif finish_reason == "eos_token" or finish_reason == "stop_sequence":
-        return "stop"
-    elif (
-        finish_reason == "FINISH_REASON_UNSPECIFIED" or finish_reason == "STOP"
-    ):  # vertex ai - got from running `print(dir(response_obj.candidates[0].finish_reason))`: ['FINISH_REASON_UNSPECIFIED', 'MAX_TOKENS', 'OTHER', 'RECITATION', 'SAFETY', 'STOP',]
-        return "stop"
-    elif finish_reason == "SAFETY":  # vertex ai
-        return "content_filter"
-    elif finish_reason == "STOP":  # vertex ai
-        return "stop"
-    elif finish_reason == "end_turn" or finish_reason == "stop_sequence":  # anthropic
-        return "stop"
-    elif finish_reason == "max_tokens":  # anthropic
-        return "length"
-    elif finish_reason == "tool_use":  # anthropic
-        return "tool_calls"
-    elif finish_reason == "content_filtered":
-        return "content_filter"
-    return finish_reason
-
-
-class TopLogprob(OpenAIObject):
-    token: str
-    """The token."""
-
-    bytes: Optional[List[int]] = None
-    """A list of integers representing the UTF-8 bytes representation of the token.
-
-    Useful in instances where characters are represented by multiple tokens and
-    their byte representations must be combined to generate the correct text
-    representation. Can be `null` if there is no bytes representation for the token.
-    """
-
-    logprob: float
-    """The log probability of this token, if it is within the top 20 most likely
-    tokens.
-
-    Otherwise, the value `-9999.0` is used to signify that the token is very
-    unlikely.
-    """
-
-
-class ChatCompletionTokenLogprob(OpenAIObject):
-    token: str
-    """The token."""
-
-    bytes: Optional[List[int]] = None
-    """A list of integers representing the UTF-8 bytes representation of the token.
-
-    Useful in instances where characters are represented by multiple tokens and
-    their byte representations must be combined to generate the correct text
-    representation. Can be `null` if there is no bytes representation for the token.
-    """
-
-    logprob: float
-    """The log probability of this token, if it is within the top 20 most likely
-    tokens.
-
-    Otherwise, the value `-9999.0` is used to signify that the token is very
-    unlikely.
-    """
-
-    top_logprobs: List[TopLogprob]
-    """List of the most likely tokens and their log probability, at this token
-    position.
-
-    In rare cases, there may be fewer than the number of requested `top_logprobs`
-    returned.
-    """
-
-
-class ChoiceLogprobs(OpenAIObject):
-    content: Optional[List[ChatCompletionTokenLogprob]] = None
-    """A list of message content tokens with log probability information."""
-
-
-class FunctionCall(OpenAIObject):
-    arguments: str
-    name: Optional[str] = None
-
-
-class Function(OpenAIObject):
-    arguments: str
-    name: Optional[str] = None
-
-    def __init__(
-        self,
-        arguments: Union[Dict, str],
-        name: Optional[str] = None,
-        **params,
-    ):
-        if isinstance(arguments, Dict):
-            arguments = json.dumps(arguments)
-        else:
-            arguments = arguments
-
-        name = name
-
-        # Build a dictionary with the structure your BaseModel expects
-        data = {"arguments": arguments, "name": name, **params}
-
-        super(Function, self).__init__(**data)
-
-    def __contains__(self, key):
-        # Define custom behavior for the 'in' operator
-        return hasattr(self, key)
-
-    def get(self, key, default=None):
-        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
-        return getattr(self, key, default)
-
-    def __getitem__(self, key):
-        # Allow dictionary-style access to attributes
-        return getattr(self, key)
-
-    def __setitem__(self, key, value):
-        # Allow dictionary-style assignment of attributes
-        setattr(self, key, value)
-
-
-class ChatCompletionDeltaToolCall(OpenAIObject):
-    id: Optional[str] = None
-    function: Function
-    type: Optional[str] = None
-    index: int
-
-
-class HiddenParams(OpenAIObject):
-    original_response: Optional[str] = None
-    model_id: Optional[str] = None  # used in Router for individual deployments
-    api_base: Optional[str] = None  # returns api base used for making completion call
-
-    model_config = ConfigDict(extra="allow", protected_namespaces=())
-
-    def get(self, key, default=None):
-        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
-        return getattr(self, key, default)
-
-    def __getitem__(self, key):
-        # Allow dictionary-style access to attributes
-        return getattr(self, key)
-
-    def __setitem__(self, key, value):
-        # Allow dictionary-style assignment of attributes
-        setattr(self, key, value)
-
-    def json(self, **kwargs):
-        try:
-            return self.model_dump()  # noqa
-        except:
-            # if using pydantic v1
-            return self.dict()
-
-
-class ChatCompletionMessageToolCall(OpenAIObject):
-    def __init__(
-        self,
-        function: Union[Dict, Function],
-        id: Optional[str] = None,
-        type: Optional[str] = None,
-        **params,
-    ):
-        super(ChatCompletionMessageToolCall, self).__init__(**params)
-        if isinstance(function, Dict):
-            self.function = Function(**function)
-        else:
-            self.function = function
-
-        if id is not None:
-            self.id = id
-        else:
-            self.id = f"{uuid.uuid4()}"
-
-        if type is not None:
-            self.type = type
-        else:
-            self.type = "function"
-
-    def __contains__(self, key):
-        # Define custom behavior for the 'in' operator
-        return hasattr(self, key)
-
-    def get(self, key, default=None):
-        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
-        return getattr(self, key, default)
-
-    def __getitem__(self, key):
-        # Allow dictionary-style access to attributes
-        return getattr(self, key)
-
-    def __setitem__(self, key, value):
-        # Allow dictionary-style assignment of attributes
-        setattr(self, key, value)
-
-
-class Message(OpenAIObject):
-    def __init__(
-        self,
-        content: Optional[str] = "default",
-        role="assistant",
-        logprobs=None,
-        function_call=None,
-        tool_calls=None,
-        **params,
-    ):
-        super(Message, self).__init__(**params)
-        self.content = content
-        self.role = role
-        if function_call is not None:
-            self.function_call = FunctionCall(**function_call)
-
-        if tool_calls is not None:
-            self.tool_calls = []
-            for tool_call in tool_calls:
-                self.tool_calls.append(ChatCompletionMessageToolCall(**tool_call))
-
-        if logprobs is not None:
-            self._logprobs = ChoiceLogprobs(**logprobs)
-
-    def get(self, key, default=None):
-        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
-        return getattr(self, key, default)
-
-    def __getitem__(self, key):
-        # Allow dictionary-style access to attributes
-        return getattr(self, key)
-
-    def __setitem__(self, key, value):
-        # Allow dictionary-style assignment of attributes
-        setattr(self, key, value)
-
-    def json(self, **kwargs):
-        try:
-            return self.model_dump()  # noqa
-        except:
-            # if using pydantic v1
-            return self.dict()
-
-
-class Delta(OpenAIObject):
-    def __init__(
-        self,
-        content=None,
-        role=None,
-        function_call=None,
-        tool_calls=None,
-        **params,
-    ):
-        super(Delta, self).__init__(**params)
-        self.content = content
-        self.role = role
-
-        if function_call is not None and isinstance(function_call, dict):
-            self.function_call = FunctionCall(**function_call)
-        else:
-            self.function_call = function_call
-        if tool_calls is not None and isinstance(tool_calls, list):
-            self.tool_calls = []
-            for tool_call in tool_calls:
-                if isinstance(tool_call, dict):
-                    if tool_call.get("index", None) is None:
-                        tool_call["index"] = 0
-                    self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call))
-                elif isinstance(tool_call, ChatCompletionDeltaToolCall):
-                    self.tool_calls.append(tool_call)
-        else:
-            self.tool_calls = tool_calls
-
-    def __contains__(self, key):
-        # Define custom behavior for the 'in' operator
-        return hasattr(self, key)
-
-    def get(self, key, default=None):
-        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
-        return getattr(self, key, default)
-
-    def __getitem__(self, key):
-        # Allow dictionary-style access to attributes
-        return getattr(self, key)
-
-    def __setitem__(self, key, value):
-        # Allow dictionary-style assignment of attributes
-        setattr(self, key, value)
-
-
-class Choices(OpenAIObject):
-    def __init__(
-        self,
-        finish_reason=None,
-        index=0,
-        message: Optional[Union[Message, dict]] = None,
-        logprobs=None,
-        enhancements=None,
-        **params,
-    ):
-        super(Choices, self).__init__(**params)
-        if finish_reason is not None:
-            self.finish_reason = map_finish_reason(
-                finish_reason
-            )  # set finish_reason for all responses
-        else:
-            self.finish_reason = "stop"
-        self.index = index
-        if message is None:
-            self.message = Message()
-        else:
-            if isinstance(message, Message):
-                self.message = message
-            elif isinstance(message, dict):
-                self.message = Message(**message)
-        if logprobs is not None:
-            self.logprobs = logprobs
-        if enhancements is not None:
-            self.enhancements = enhancements
-
-    def __contains__(self, key):
-        # Define custom behavior for the 'in' operator
-        return hasattr(self, key)
-
-    def get(self, key, default=None):
-        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
-        return getattr(self, key, default)
-
-    def __getitem__(self, key):
-        # Allow dictionary-style access to attributes
-        return getattr(self, key)
-
-    def __setitem__(self, key, value):
-        # Allow dictionary-style assignment of attributes
-        setattr(self, key, value)
-
-
-class Usage(OpenAIObject):
-    def __init__(
-        self, prompt_tokens=None, completion_tokens=None, total_tokens=None, **params
-    ):
-        super(Usage, self).__init__(**params)
-        if prompt_tokens:
-            self.prompt_tokens = prompt_tokens
-        if completion_tokens:
-            self.completion_tokens = completion_tokens
-        if total_tokens:
-            self.total_tokens = total_tokens
-
-    def __contains__(self, key):
-        # Define custom behavior for the 'in' operator
-        return hasattr(self, key)
-
-    def get(self, key, default=None):
-        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
-        return getattr(self, key, default)
-
-    def __getitem__(self, key):
-        # Allow dictionary-style access to attributes
-        return getattr(self, key)
-
-    def __setitem__(self, key, value):
-        # Allow dictionary-style assignment of attributes
-        setattr(self, key, value)
-
-
-class StreamingChoices(OpenAIObject):
-    def __init__(
-        self,
-        finish_reason=None,
-        index=0,
-        delta: Optional[Delta] = None,
-        logprobs=None,
-        enhancements=None,
-        **params,
-    ):
-        super(StreamingChoices, self).__init__(**params)
-        if finish_reason:
-            self.finish_reason = finish_reason
-        else:
-            self.finish_reason = None
-        self.index = index
-        if delta is not None:
-            if isinstance(delta, Delta):
-                self.delta = delta
-            elif isinstance(delta, dict):
-                self.delta = Delta(**delta)
-        else:
-            self.delta = Delta()
-        if enhancements is not None:
-            self.enhancements = enhancements
-
-        if logprobs is not None and isinstance(logprobs, dict):
-            self.logprobs = ChoiceLogprobs(**logprobs)
-        else:
-            self.logprobs = logprobs  # type: ignore
-
-    def __contains__(self, key):
-        # Define custom behavior for the 'in' operator
-        return hasattr(self, key)
-
-    def get(self, key, default=None):
-        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
-        return getattr(self, key, default)
-
-    def __getitem__(self, key):
-        # Allow dictionary-style access to attributes
-        return getattr(self, key)
-
-    def __setitem__(self, key, value):
-        # Allow dictionary-style assignment of attributes
-        setattr(self, key, value)
-
-
-class ModelResponse(OpenAIObject):
-    id: str
-    """A unique identifier for the completion."""
-
-    choices: List[Union[Choices, StreamingChoices]]
-    """The list of completion choices the model generated for the input prompt."""
-
-    created: int
-    """The Unix timestamp (in seconds) of when the completion was created."""
-
-    model: Optional[str] = None
-    """The model used for completion."""
-
-    object: str
-    """The object type, which is always "text_completion" """
-
-    system_fingerprint: Optional[str] = None
-    """This fingerprint represents the backend configuration that the model runs with.
-
-    Can be used in conjunction with the `seed` request parameter to understand when
-    backend changes have been made that might impact determinism.
-    """
-
-    _hidden_params: dict = {}
-
-    def __init__(
-        self,
-        id=None,
-        choices=None,
-        created=None,
-        model=None,
-        object=None,
-        system_fingerprint=None,
-        usage=None,
-        stream=None,
-        stream_options=None,
-        response_ms=None,
-        hidden_params=None,
-        **params,
-    ):
-        if stream is not None and stream == True:
-            object = "chat.completion.chunk"
-            if choices is not None and isinstance(choices, list):
-                new_choices = []
-                for choice in choices:
-                    if isinstance(choice, StreamingChoices):
-                        _new_choice = choice
-                    elif isinstance(choice, dict):
-                        _new_choice = StreamingChoices(**choice)
-                    new_choices.append(_new_choice)
-                choices = new_choices
-            else:
-                choices = [StreamingChoices()]
-        else:
-            if model in litellm.open_ai_embedding_models:
-                object = "embedding"
-            else:
-                object = "chat.completion"
-            if choices is not None and isinstance(choices, list):
-                new_choices = []
-                for choice in choices:
-                    if isinstance(choice, Choices):
-                        _new_choice = choice
-                    elif isinstance(choice, dict):
-                        _new_choice = Choices(**choice)
-                    new_choices.append(_new_choice)
-                choices = new_choices
-            else:
-                choices = [Choices()]
-        if id is None:
-            id = _generate_id()
-        else:
-            id = id
-        if created is None:
-            created = int(time.time())
-        else:
-            created = created
-        model = model
-        if usage is not None:
-            if isinstance(usage, dict):
-                usage = Usage(**usage)
-            else:
-                usage = usage
-        elif stream is None or stream == False:
-            usage = Usage()
-        if hidden_params:
-            self._hidden_params = hidden_params
-
-        init_values = {
-            "id": id,
-            "choices": choices,
-            "created": created,
-            "model": model,
-            "object": object,
-            "system_fingerprint": system_fingerprint,
-        }
-
-        if usage is not None:
-            init_values["usage"] = usage
-
-        super().__init__(
-            **init_values,
-            **params,
-        )
-
-    def __contains__(self, key):
-        # Define custom behavior for the 'in' operator
-        return hasattr(self, key)
-
-    def get(self, key, default=None):
-        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
-        return getattr(self, key, default)
-
-    def __getitem__(self, key):
-        # Allow dictionary-style access to attributes
-        return getattr(self, key)
-
-    def __setitem__(self, key, value):
-        # Allow dictionary-style assignment of attributes
-        setattr(self, key, value)
-
-    def json(self, **kwargs):
-        try:
-            return self.model_dump()  # noqa
-        except:
-            # if using pydantic v1
-            return self.dict()
-
-
-class Embedding(OpenAIObject):
-    embedding: Union[list, str] = []
-    index: int
-    object: str
-
-    def get(self, key, default=None):
-        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
-        return getattr(self, key, default)
-
-    def __getitem__(self, key):
-        # Allow dictionary-style access to attributes
-        return getattr(self, key)
-
-    def __setitem__(self, key, value):
-        # Allow dictionary-style assignment of attributes
-        setattr(self, key, value)
-
-
-class EmbeddingResponse(OpenAIObject):
-    model: Optional[str] = None
-    """The model used for embedding."""
-
-    data: Optional[List] = None
-    """The actual embedding value"""
-
-    object: str
-    """The object type, which is always "embedding" """
-
-    usage: Optional[Usage] = None
-    """Usage statistics for the embedding request."""
-
-    _hidden_params: dict = {}
-
-    def __init__(
-        self,
-        model=None,
-        usage=None,
-        stream=False,
-        response_ms=None,
-        data=None,
-        **params,
-    ):
-        object = "list"
-        if response_ms:
-            _response_ms = response_ms
-        else:
-            _response_ms = None
-        if data:
-            data = data
-        else:
-            data = None
-
-        if usage:
-            usage = usage
-        else:
-            usage = Usage()
-
-        model = model
-        super().__init__(model=model, object=object, data=data, usage=usage)
-
-    def __contains__(self, key):
-        # Define custom behavior for the 'in' operator
-        return hasattr(self, key)
-
-    def get(self, key, default=None):
-        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
-        return getattr(self, key, default)
-
-    def __getitem__(self, key):
-        # Allow dictionary-style access to attributes
-        return getattr(self, key)
-
-    def __setitem__(self, key, value):
-        # Allow dictionary-style assignment of attributes
-        setattr(self, key, value)
-
-    def json(self, **kwargs):
-        try:
-            return self.model_dump()  # noqa
-        except:
-            # if using pydantic v1
-            return self.dict()
-
-
-class Logprobs(OpenAIObject):
-    text_offset: List[int]
-    token_logprobs: List[float]
-    tokens: List[str]
-    top_logprobs: List[Dict[str, float]]
-
-
-class TextChoices(OpenAIObject):
-    def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params):
-        super(TextChoices, self).__init__(**params)
-        if finish_reason:
-            self.finish_reason = map_finish_reason(finish_reason)
-        else:
-            self.finish_reason = None
-        self.index = index
-        if text is not None:
-            self.text = text
-        else:
-            self.text = None
-        if logprobs is None:
-            self.logprobs = None
-        else:
-            if isinstance(logprobs, dict):
-                self.logprobs = Logprobs(**logprobs)
-            else:
-                self.logprobs = logprobs
-
-    def __contains__(self, key):
-        # Define custom behavior for the 'in' operator
-        return hasattr(self, key)
-
-    def get(self, key, default=None):
-        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
-        return getattr(self, key, default)
-
-    def __getitem__(self, key):
-        # Allow dictionary-style access to attributes
-        return getattr(self, key)
-
-    def __setitem__(self, key, value):
-        # Allow dictionary-style assignment of attributes
-        setattr(self, key, value)
-
-    def json(self, **kwargs):
-        try:
-            return self.model_dump()  # noqa
-        except:
-            # if using pydantic v1
-            return self.dict()
-
-
-class TextCompletionResponse(OpenAIObject):
-    """
-    {
-        "id": response["id"],
-        "object": "text_completion",
-        "created": response["created"],
-        "model": response["model"],
-        "choices": [
-        {
-            "text": response["choices"][0]["message"]["content"],
-            "index": response["choices"][0]["index"],
-            "logprobs": transformed_logprobs,
-            "finish_reason": response["choices"][0]["finish_reason"]
-        }
-        ],
-        "usage": response["usage"]
-    }
-    """
-
-    id: str
-    object: str
-    created: int
-    model: Optional[str]
-    choices: List[TextChoices]
-    usage: Optional[Usage]
-    _response_ms: Optional[int] = None
-    _hidden_params: HiddenParams
-
-    def __init__(
-        self,
-        id=None,
-        choices=None,
-        created=None,
-        model=None,
-        usage=None,
-        stream=False,
-        response_ms=None,
-        object=None,
-        **params,
-    ):
-        if stream:
-            object = "text_completion.chunk"
-            choices = [TextChoices()]
-        else:
-            object = "text_completion"
-            if choices is not None and isinstance(choices, list):
-                new_choices = []
-                for choice in choices:
-                    if isinstance(choice, TextChoices):
-                        _new_choice = choice
-                    elif isinstance(choice, dict):
-                        _new_choice = TextChoices(**choice)
-                    new_choices.append(_new_choice)
-                choices = new_choices
-            else:
-                choices = [TextChoices()]
-        if object is not None:
-            object = object
-        if id is None:
-            id = _generate_id()
-        else:
-            id = id
-        if created is None:
-            created = int(time.time())
-        else:
-            created = created
-
-        model = model
-        if usage:
-            usage = usage
-        else:
-            usage = Usage()
-
-        super(TextCompletionResponse, self).__init__(
-            id=id,
-            object=object,
-            created=created,
-            model=model,
-            choices=choices,
-            usage=usage,
-            **params,
-        )
-
-        if response_ms:
-            self._response_ms = response_ms
-        else:
-            self._response_ms = None
-        self._hidden_params = HiddenParams()
-
-    def __contains__(self, key):
-        # Define custom behavior for the 'in' operator
-        return hasattr(self, key)
-
-    def get(self, key, default=None):
-        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
-        return getattr(self, key, default)
-
-    def __getitem__(self, key):
-        # Allow dictionary-style access to attributes
-        return getattr(self, key)
-
-    def __setitem__(self, key, value):
-        # Allow dictionary-style assignment of attributes
-        setattr(self, key, value)
-
-
-class ImageObject(OpenAIObject):
-    """
-    Represents the url or the content of an image generated by the OpenAI API.
-
-    Attributes:
-    b64_json: The base64-encoded JSON of the generated image, if response_format is b64_json.
-    url: The URL of the generated image, if response_format is url (default).
-    revised_prompt: The prompt that was used to generate the image, if there was any revision to the prompt.
-
-    https://platform.openai.com/docs/api-reference/images/object
-    """
-
-    b64_json: Optional[str] = None
-    url: Optional[str] = None
-    revised_prompt: Optional[str] = None
-
-    def __init__(self, b64_json=None, url=None, revised_prompt=None):
-        super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt)
-
-    def __contains__(self, key):
-        # Define custom behavior for the 'in' operator
-        return hasattr(self, key)
-
-    def get(self, key, default=None):
-        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
-        return getattr(self, key, default)
-
-    def __getitem__(self, key):
-        # Allow dictionary-style access to attributes
-        return getattr(self, key)
-
-    def __setitem__(self, key, value):
-        # Allow dictionary-style assignment of attributes
-        setattr(self, key, value)
-
-    def json(self, **kwargs):
-        try:
-            return self.model_dump()  # noqa
-        except:
-            # if using pydantic v1
-            return self.dict()
-
-
-class ImageResponse(OpenAIObject):
-    created: Optional[int] = None
-
-    data: Optional[List[ImageObject]] = None
-
-    usage: Optional[dict] = None
-
-    _hidden_params: dict = {}
-
-    def __init__(self, created=None, data=None, response_ms=None):
-        if response_ms:
-            _response_ms = response_ms
-        else:
-            _response_ms = None
-        if data:
-            data = data
-        else:
-            data = None
-
-        if created:
-            created = created
-        else:
-            created = None
-
-        super().__init__(data=data, created=created)
-        self.usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
-
-    def __contains__(self, key):
-        # Define custom behavior for the 'in' operator
-        return hasattr(self, key)
-
-    def get(self, key, default=None):
-        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
-        return getattr(self, key, default)
-
-    def __getitem__(self, key):
-        # Allow dictionary-style access to attributes
-        return getattr(self, key)
-
-    def __setitem__(self, key, value):
-        # Allow dictionary-style assignment of attributes
-        setattr(self, key, value)
-
-    def json(self, **kwargs):
-        try:
-            return self.model_dump()  # noqa
-        except:
-            # if using pydantic v1
-            return self.dict()
-
-
-class TranscriptionResponse(OpenAIObject):
-    text: Optional[str] = None
-
-    _hidden_params: dict = {}
-
-    def __init__(self, text=None):
-        super().__init__(text=text)
-
-    def __contains__(self, key):
-        # Define custom behavior for the 'in' operator
-        return hasattr(self, key)
-
-    def get(self, key, default=None):
-        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
-        return getattr(self, key, default)
-
-    def __getitem__(self, key):
-        # Allow dictionary-style access to attributes
-        return getattr(self, key)
-
-    def __setitem__(self, key, value):
-        # Allow dictionary-style assignment of attributes
-        setattr(self, key, value)
-
-    def json(self, **kwargs):
-        try:
-            return self.model_dump()  # noqa
-        except:
-            # if using pydantic v1
-            return self.dict()
-
-
 ############################################################
 def print_verbose(
     print_statement,
@@ -1156,1602 +250,6 @@ def print_verbose(
 
 
 ####### LOGGING ###################
-from enum import Enum
-
-
-class CallTypes(Enum):
-    embedding = "embedding"
-    aembedding = "aembedding"
-    completion = "completion"
-    acompletion = "acompletion"
-    atext_completion = "atext_completion"
-    text_completion = "text_completion"
-    image_generation = "image_generation"
-    aimage_generation = "aimage_generation"
-    moderation = "moderation"
-    amoderation = "amoderation"
-    atranscription = "atranscription"
-    transcription = "transcription"
-    aspeech = "aspeech"
-    speech = "speech"
-
-
-# Logging function -> log the exact model details + what's being sent | Non-BlockingP
-class Logging:
-    global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger
-
-    custom_pricing: bool = False
-    stream_options = None
-
-    def __init__(
-        self,
-        model,
-        messages,
-        stream,
-        call_type,
-        start_time,
-        litellm_call_id,
-        function_id,
-        dynamic_success_callbacks=None,
-        dynamic_failure_callbacks=None,
-        dynamic_async_success_callbacks=None,
-        langfuse_public_key=None,
-        langfuse_secret=None,
-    ):
-        if call_type not in [item.value for item in CallTypes]:
-            allowed_values = ", ".join([item.value for item in CallTypes])
-            raise ValueError(
-                f"Invalid call_type {call_type}. Allowed values: {allowed_values}"
-            )
-        if messages is not None:
-            if isinstance(messages, str):
-                messages = [
-                    {"role": "user", "content": messages}
-                ]  # convert text completion input to the chat completion format
-            elif (
-                isinstance(messages, list)
-                and len(messages) > 0
-                and isinstance(messages[0], str)
-            ):
-                new_messages = []
-                for m in messages:
-                    new_messages.append({"role": "user", "content": m})
-                messages = new_messages
-        self.model = model
-        self.messages = messages
-        self.stream = stream
-        self.start_time = start_time  # log the call start time
-        self.call_type = call_type
-        self.litellm_call_id = litellm_call_id
-        self.function_id = function_id
-        self.streaming_chunks = []  # for generating complete stream response
-        self.sync_streaming_chunks = []  # for generating complete stream response
-        self.model_call_details = {}
-        self.dynamic_input_callbacks = []  # [TODO] callbacks set for just that call
-        self.dynamic_failure_callbacks = dynamic_failure_callbacks
-        self.dynamic_success_callbacks = (
-            dynamic_success_callbacks  # callbacks set for just that call
-        )
-        self.dynamic_async_success_callbacks = (
-            dynamic_async_success_callbacks  # callbacks set for just that call
-        )
-        ## DYNAMIC LANGFUSE KEYS ##
-        self.langfuse_public_key = langfuse_public_key
-        self.langfuse_secret = langfuse_secret
-        ## TIME TO FIRST TOKEN LOGGING ##
-        self.completion_start_time: Optional[datetime.datetime] = None
-
-    def update_environment_variables(
-        self, model, user, optional_params, litellm_params, **additional_params
-    ):
-        self.optional_params = optional_params
-        self.model = model
-        self.user = user
-        self.litellm_params = litellm_params
-        self.logger_fn = litellm_params.get("logger_fn", None)
-        print_verbose(f"self.optional_params: {self.optional_params}")
-
-        self.model_call_details = {
-            "model": self.model,
-            "messages": self.messages,
-            "optional_params": self.optional_params,
-            "litellm_params": self.litellm_params,
-            "start_time": self.start_time,
-            "stream": self.stream,
-            "user": user,
-            "call_type": str(self.call_type),
-            "litellm_call_id": self.litellm_call_id,
-            "completion_start_time": self.completion_start_time,
-            **self.optional_params,
-            **additional_params,
-        }
-
-        ## check if stream options is set ##  - used by CustomStreamWrapper for easy instrumentation
-        if "stream_options" in additional_params:
-            self.stream_options = additional_params["stream_options"]
-        ## check if custom pricing set ##
-        if (
-            litellm_params.get("input_cost_per_token") is not None
-            or litellm_params.get("input_cost_per_second") is not None
-            or litellm_params.get("output_cost_per_token") is not None
-            or litellm_params.get("output_cost_per_second") is not None
-        ):
-            self.custom_pricing = True
-
-    def _pre_call(self, input, api_key, model=None, additional_args={}):
-        """
-        Common helper function across the sync + async pre-call function
-        """
-        # print_verbose(f"logging pre call for model: {self.model} with call type: {self.call_type}")
-        self.model_call_details["input"] = input
-        self.model_call_details["api_key"] = api_key
-        self.model_call_details["additional_args"] = additional_args
-        self.model_call_details["log_event_type"] = "pre_api_call"
-        if (
-            model
-        ):  # if model name was changes pre-call, overwrite the initial model call name with the new one
-            self.model_call_details["model"] = model
-
-    def pre_call(self, input, api_key, model=None, additional_args={}):
-        # Log the exact input to the LLM API
-        litellm.error_logs["PRE_CALL"] = locals()
-        try:
-            self._pre_call(
-                input=input,
-                api_key=api_key,
-                model=model,
-                additional_args=additional_args,
-            )
-
-            # User Logging -> if you pass in a custom logging function
-            headers = additional_args.get("headers", {})
-            if headers is None:
-                headers = {}
-            data = additional_args.get("complete_input_dict", {})
-            api_base = additional_args.get("api_base", "")
-            self.model_call_details["litellm_params"]["api_base"] = str(
-                api_base
-            )  # used for alerting
-            masked_headers = {
-                k: (
-                    (v[:-44] + "*" * 44)
-                    if (isinstance(v, str) and len(v) > 44)
-                    else "*****"
-                )
-                for k, v in headers.items()
-            }
-            formatted_headers = " ".join(
-                [f"-H '{k}: {v}'" for k, v in masked_headers.items()]
-            )
-
-            verbose_logger.debug(f"PRE-API-CALL ADDITIONAL ARGS: {additional_args}")
-
-            curl_command = "\n\nPOST Request Sent from LiteLLM:\n"
-            curl_command += "curl -X POST \\\n"
-            curl_command += f"{api_base} \\\n"
-            curl_command += (
-                f"{formatted_headers} \\\n" if formatted_headers.strip() != "" else ""
-            )
-            curl_command += f"-d '{str(data)}'\n"
-            if additional_args.get("request_str", None) is not None:
-                # print the sagemaker / bedrock client request
-                curl_command = "\nRequest Sent from LiteLLM:\n"
-                curl_command += additional_args.get("request_str", None)
-            elif api_base == "":
-                curl_command = self.model_call_details
-
-            # only print verbose if verbose logger is not set
-            if verbose_logger.level == 0:
-                # this means verbose logger was not switched on - user is in litellm.set_verbose=True
-                print_verbose(f"\033[92m{curl_command}\033[0m\n")
-
-            if litellm.json_logs:
-                verbose_logger.debug(
-                    "POST Request Sent from LiteLLM",
-                    extra={"api_base": {api_base}, **masked_headers},
-                )
-            else:
-                verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n")
-            # log raw request to provider (like LangFuse) -- if opted in.
-            if litellm.log_raw_request_response is True:
-                try:
-                    # [Non-blocking Extra Debug Information in metadata]
-                    _litellm_params = self.model_call_details.get("litellm_params", {})
-                    _metadata = _litellm_params.get("metadata", {}) or {}
-                    if (
-                        litellm.turn_off_message_logging is not None
-                        and litellm.turn_off_message_logging is True
-                    ):
-                        _metadata["raw_request"] = (
-                            "redacted by litellm. \
-                            'litellm.turn_off_message_logging=True'"
-                        )
-                    else:
-                        _metadata["raw_request"] = str(curl_command)
-                except Exception as e:
-                    _metadata["raw_request"] = (
-                        "Unable to Log \
-                        raw request: {}".format(
-                            str(e)
-                        )
-                    )
-            if self.logger_fn and callable(self.logger_fn):
-                try:
-                    self.logger_fn(
-                        self.model_call_details
-                    )  # Expectation: any logger function passed in by the user should accept a dict object
-                except Exception as e:
-                    print_verbose(
-                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
-                    )
-            # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
-            callbacks = litellm.input_callback + self.dynamic_input_callbacks
-            for callback in callbacks:
-                try:
-                    if callback == "supabase":
-                        print_verbose("reaches supabase for logging!")
-                        model = self.model_call_details["model"]
-                        messages = self.model_call_details["input"]
-                        print_verbose(f"supabaseClient: {supabaseClient}")
-                        supabaseClient.input_log_event(
-                            model=model,
-                            messages=messages,
-                            end_user=self.model_call_details.get("user", "default"),
-                            litellm_call_id=self.litellm_params["litellm_call_id"],
-                            print_verbose=print_verbose,
-                        )
-                    elif callback == "sentry" and add_breadcrumb:
-                        try:
-                            details_to_log = copy.deepcopy(self.model_call_details)
-                        except:
-                            details_to_log = self.model_call_details
-                        if litellm.turn_off_message_logging:
-                            # make a copy of the _model_Call_details and log it
-                            details_to_log.pop("messages", None)
-                            details_to_log.pop("input", None)
-                            details_to_log.pop("prompt", None)
-
-                        add_breadcrumb(
-                            category="litellm.llm_call",
-                            message=f"Model Call Details pre-call: {details_to_log}",
-                            level="info",
-                        )
-                    elif isinstance(callback, CustomLogger):  # custom logger class
-                        callback.log_pre_api_call(
-                            model=self.model,
-                            messages=self.messages,
-                            kwargs=self.model_call_details,
-                        )
-                    elif callable(callback):  # custom logger functions
-                        customLogger.log_input_event(
-                            model=self.model,
-                            messages=self.messages,
-                            kwargs=self.model_call_details,
-                            print_verbose=print_verbose,
-                            callback_func=callback,
-                        )
-                except Exception as e:
-                    verbose_logger.error(
-                        "litellm.Logging.pre_call(): Exception occured - {}".format(
-                            str(e)
-                        )
-                    )
-                    verbose_logger.debug(
-                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while input logging with integrations {traceback.format_exc()}"
-                    )
-                    print_verbose(
-                        f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
-                    )
-                    if capture_exception:  # log this error to sentry for debugging
-                        capture_exception(e)
-        except:
-            print_verbose(
-                f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
-            )
-            print_verbose(
-                f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
-            )
-            if capture_exception:  # log this error to sentry for debugging
-                capture_exception(e)
-
-    def post_call(
-        self, original_response, input=None, api_key=None, additional_args={}
-    ):
-        # Log the exact result from the LLM API, for streaming - log the type of response received
-        litellm.error_logs["POST_CALL"] = locals()
-        if isinstance(original_response, dict):
-            original_response = json.dumps(original_response)
-        try:
-            self.model_call_details["input"] = input
-            self.model_call_details["api_key"] = api_key
-            self.model_call_details["original_response"] = original_response
-            self.model_call_details["additional_args"] = additional_args
-            self.model_call_details["log_event_type"] = "post_api_call"
-            # User Logging -> if you pass in a custom logging function
-            print_verbose(
-                f"RAW RESPONSE:\n{self.model_call_details.get('original_response', self.model_call_details)}\n\n",
-                log_level="DEBUG",
-            )
-            if self.logger_fn and callable(self.logger_fn):
-                try:
-                    self.logger_fn(
-                        self.model_call_details
-                    )  # Expectation: any logger function passed in by the user should accept a dict object
-                except Exception as e:
-                    print_verbose(
-                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
-                    )
-            original_response = redact_message_input_output_from_logging(
-                litellm_logging_obj=self, result=original_response
-            )
-            # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
-
-            callbacks = litellm.input_callback + self.dynamic_input_callbacks
-            for callback in callbacks:
-                try:
-                    if callback == "lite_debugger":
-                        print_verbose("reaches litedebugger for post-call logging!")
-                        print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
-                        liteDebuggerClient.post_call_log_event(
-                            original_response=original_response,
-                            litellm_call_id=self.litellm_params["litellm_call_id"],
-                            print_verbose=print_verbose,
-                            call_type=self.call_type,
-                            stream=self.stream,
-                        )
-                    elif callback == "sentry" and add_breadcrumb:
-                        print_verbose("reaches sentry breadcrumbing")
-                        try:
-                            details_to_log = copy.deepcopy(self.model_call_details)
-                        except:
-                            details_to_log = self.model_call_details
-                        if litellm.turn_off_message_logging:
-                            # make a copy of the _model_Call_details and log it
-                            details_to_log.pop("messages", None)
-                            details_to_log.pop("input", None)
-                            details_to_log.pop("prompt", None)
-
-                        add_breadcrumb(
-                            category="litellm.llm_call",
-                            message=f"Model Call Details post-call: {details_to_log}",
-                            level="info",
-                        )
-                    elif isinstance(callback, CustomLogger):  # custom logger class
-                        callback.log_post_api_call(
-                            kwargs=self.model_call_details,
-                            response_obj=None,
-                            start_time=self.start_time,
-                            end_time=None,
-                        )
-                except Exception as e:
-                    print_verbose(
-                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while post-call logging with integrations {traceback.format_exc()}"
-                    )
-                    print_verbose(
-                        f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
-                    )
-                    if capture_exception:  # log this error to sentry for debugging
-                        capture_exception(e)
-        except:
-            print_verbose(
-                f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
-            )
-            pass
-
-    def _success_handler_helper_fn(
-        self, result=None, start_time=None, end_time=None, cache_hit=None
-    ):
-        try:
-            if start_time is None:
-                start_time = self.start_time
-            if end_time is None:
-                end_time = datetime.datetime.now()
-            if self.completion_start_time is None:
-                self.completion_start_time = end_time
-                self.model_call_details["completion_start_time"] = (
-                    self.completion_start_time
-                )
-            self.model_call_details["log_event_type"] = "successful_api_call"
-            self.model_call_details["end_time"] = end_time
-            self.model_call_details["cache_hit"] = cache_hit
-            ## if model in model cost map - log the response cost
-            ## else set cost to None
-            verbose_logger.debug(f"Model={self.model};")
-            if (
-                result is not None
-                and (
-                    isinstance(result, ModelResponse)
-                    or isinstance(result, EmbeddingResponse)
-                    or isinstance(result, ImageResponse)
-                    or isinstance(result, TranscriptionResponse)
-                    or isinstance(result, TextCompletionResponse)
-                )
-                and self.stream != True
-            ):  # handle streaming separately
-                self.model_call_details["response_cost"] = (
-                    litellm.response_cost_calculator(
-                        response_object=result,
-                        model=self.model,
-                        cache_hit=self.model_call_details.get("cache_hit", False),
-                        custom_llm_provider=self.model_call_details.get(
-                            "custom_llm_provider", None
-                        ),
-                        base_model=_get_base_model_from_metadata(
-                            model_call_details=self.model_call_details
-                        ),
-                        call_type=self.call_type,
-                        optional_params=self.optional_params,
-                    )
-                )
-            else:  # streaming chunks + image gen.
-                self.model_call_details["response_cost"] = None
-
-            if (
-                litellm.max_budget
-                and self.stream == False
-                and result is not None
-                and "content" in result
-            ):
-                time_diff = (end_time - start_time).total_seconds()
-                float_diff = float(time_diff)
-                litellm._current_cost += litellm.completion_cost(
-                    model=self.model,
-                    prompt="",
-                    completion=result["content"],
-                    total_time=float_diff,
-                )
-
-            return start_time, end_time, result
-        except Exception as e:
-            raise Exception(f"[Non-Blocking] LiteLLM.Success_Call Error: {str(e)}")
-
-    def success_handler(
-        self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs
-    ):
-        print_verbose(f"Logging Details LiteLLM-Success Call: {cache_hit}")
-        start_time, end_time, result = self._success_handler_helper_fn(
-            start_time=start_time,
-            end_time=end_time,
-            result=result,
-            cache_hit=cache_hit,
-        )
-        # print(f"original response in success handler: {self.model_call_details['original_response']}")
-        try:
-            print_verbose(f"success callbacks: {litellm.success_callback}")
-            ## BUILD COMPLETE STREAMED RESPONSE
-            complete_streaming_response = None
-            if self.stream and isinstance(result, ModelResponse):
-                if (
-                    result.choices[0].finish_reason is not None
-                ):  # if it's the last chunk
-                    self.sync_streaming_chunks.append(result)
-                    # print_verbose(f"final set of received chunks: {self.sync_streaming_chunks}")
-                    try:
-                        complete_streaming_response = litellm.stream_chunk_builder(
-                            self.sync_streaming_chunks,
-                            messages=self.model_call_details.get("messages", None),
-                            start_time=start_time,
-                            end_time=end_time,
-                        )
-                    except Exception as e:
-                        print_verbose(
-                            "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while building complete streaming response in success logging {}\n{}".format(
-                                str(e), traceback.format_exc()
-                            ),
-                            log_level="ERROR",
-                        )
-                        complete_streaming_response = None
-                else:
-                    self.sync_streaming_chunks.append(result)
-
-            if complete_streaming_response is not None:
-                print_verbose(
-                    f"Logging Details LiteLLM-Success Call streaming complete"
-                )
-                self.model_call_details["complete_streaming_response"] = (
-                    complete_streaming_response
-                )
-                self.model_call_details["response_cost"] = (
-                    litellm.response_cost_calculator(
-                        response_object=complete_streaming_response,
-                        model=self.model,
-                        cache_hit=self.model_call_details.get("cache_hit", False),
-                        custom_llm_provider=self.model_call_details.get(
-                            "custom_llm_provider", None
-                        ),
-                        base_model=_get_base_model_from_metadata(
-                            model_call_details=self.model_call_details
-                        ),
-                        call_type=self.call_type,
-                        optional_params=self.optional_params,
-                    )
-                )
-            if self.dynamic_success_callbacks is not None and isinstance(
-                self.dynamic_success_callbacks, list
-            ):
-                callbacks = self.dynamic_success_callbacks
-                ## keep the internal functions ##
-                for callback in litellm.success_callback:
-                    if (
-                        isinstance(callback, CustomLogger)
-                        and "_PROXY_" in callback.__class__.__name__
-                    ):
-                        callbacks.append(callback)
-            else:
-                callbacks = litellm.success_callback
-
-            result = redact_message_input_output_from_logging(
-                result=result, litellm_logging_obj=self
-            )
-
-            for callback in callbacks:
-                try:
-                    litellm_params = self.model_call_details.get("litellm_params", {})
-                    if litellm_params.get("no-log", False) == True:
-                        # proxy cost tracking cal backs should run
-                        if not (
-                            isinstance(callback, CustomLogger)
-                            and "_PROXY_" in callback.__class__.__name__
-                        ):
-                            print_verbose("no-log request, skipping logging")
-                            continue
-                    if callback == "lite_debugger":
-                        print_verbose("reaches lite_debugger for logging!")
-                        print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
-                        print_verbose(
-                            f"liteDebuggerClient details function {self.call_type} and stream set to {self.stream}"
-                        )
-                        liteDebuggerClient.log_event(
-                            end_user=kwargs.get("user", "default"),
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            litellm_call_id=self.litellm_call_id,
-                            print_verbose=print_verbose,
-                            call_type=self.call_type,
-                            stream=self.stream,
-                        )
-                    if callback == "promptlayer":
-                        print_verbose("reaches promptlayer for logging!")
-                        promptLayerLogger.log_event(
-                            kwargs=self.model_call_details,
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            print_verbose=print_verbose,
-                        )
-                    if callback == "supabase":
-                        print_verbose("reaches supabase for logging!")
-                        kwargs = self.model_call_details
-
-                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
-                        if self.stream:
-                            if "complete_streaming_response" not in kwargs:
-                                continue
-                            else:
-                                print_verbose("reaches supabase for streaming logging!")
-                                result = kwargs["complete_streaming_response"]
-
-                        model = kwargs["model"]
-                        messages = kwargs["messages"]
-                        optional_params = kwargs.get("optional_params", {})
-                        litellm_params = kwargs.get("litellm_params", {})
-                        supabaseClient.log_event(
-                            model=model,
-                            messages=messages,
-                            end_user=optional_params.get("user", "default"),
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            litellm_call_id=litellm_params.get(
-                                "litellm_call_id", str(uuid.uuid4())
-                            ),
-                            print_verbose=print_verbose,
-                        )
-                    if callback == "wandb":
-                        print_verbose("reaches wandb for logging!")
-                        weightsBiasesLogger.log_event(
-                            kwargs=self.model_call_details,
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            print_verbose=print_verbose,
-                        )
-                    if callback == "langsmith":
-                        print_verbose("reaches langsmith for logging!")
-                        if self.stream:
-                            if "complete_streaming_response" not in kwargs:
-                                continue
-                            else:
-                                print_verbose(
-                                    "reaches langsmith for streaming logging!"
-                                )
-                                result = kwargs["complete_streaming_response"]
-                        langsmithLogger.log_event(
-                            kwargs=self.model_call_details,
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            print_verbose=print_verbose,
-                        )
-                    if callback == "logfire":
-                        global logfireLogger
-                        verbose_logger.debug("reaches logfire for success logging!")
-                        kwargs = {}
-                        for k, v in self.model_call_details.items():
-                            if (
-                                k != "original_response"
-                            ):  # copy.deepcopy raises errors as this could be a coroutine
-                                kwargs[k] = v
-
-                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
-                        if self.stream:
-                            if "complete_streaming_response" not in kwargs:
-                                continue
-                            else:
-                                print_verbose("reaches logfire for streaming logging!")
-                                result = kwargs["complete_streaming_response"]
-
-                        logfireLogger.log_event(
-                            kwargs=self.model_call_details,
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            print_verbose=print_verbose,
-                            level=LogfireLevel.INFO.value,
-                        )
-
-                    if callback == "lunary":
-                        print_verbose("reaches lunary for logging!")
-                        model = self.model
-                        kwargs = self.model_call_details
-
-                        input = kwargs.get("messages", kwargs.get("input", None))
-
-                        type = (
-                            "embed"
-                            if self.call_type == CallTypes.embedding.value
-                            else "llm"
-                        )
-
-                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
-                        if self.stream:
-                            if "complete_streaming_response" not in kwargs:
-                                continue
-                            else:
-                                result = kwargs["complete_streaming_response"]
-
-                        lunaryLogger.log_event(
-                            type=type,
-                            kwargs=kwargs,
-                            event="end",
-                            model=model,
-                            input=input,
-                            user_id=kwargs.get("user", None),
-                            # user_props=self.model_call_details.get("user_props", None),
-                            extra=kwargs.get("optional_params", {}),
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            run_id=self.litellm_call_id,
-                            print_verbose=print_verbose,
-                        )
-                    if callback == "helicone":
-                        print_verbose("reaches helicone for logging!")
-                        model = self.model
-                        messages = self.model_call_details["input"]
-                        heliconeLogger.log_success(
-                            model=model,
-                            messages=messages,
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            print_verbose=print_verbose,
-                        )
-                    if callback == "langfuse":
-                        global langFuseLogger
-                        verbose_logger.debug("reaches langfuse for success logging!")
-                        kwargs = {}
-                        for k, v in self.model_call_details.items():
-                            if (
-                                k != "original_response"
-                            ):  # copy.deepcopy raises errors as this could be a coroutine
-                                kwargs[k] = v
-                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
-                        if self.stream:
-                            verbose_logger.debug(
-                                f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
-                            )
-                            if complete_streaming_response is None:
-                                continue
-                            else:
-                                print_verbose("reaches langfuse for streaming logging!")
-                                result = kwargs["complete_streaming_response"]
-                        if langFuseLogger is None or (
-                            (
-                                self.langfuse_public_key is not None
-                                and self.langfuse_public_key
-                                != langFuseLogger.public_key
-                            )
-                            and (
-                                self.langfuse_public_key is not None
-                                and self.langfuse_public_key
-                                != langFuseLogger.public_key
-                            )
-                        ):
-                            langFuseLogger = LangFuseLogger(
-                                langfuse_public_key=self.langfuse_public_key,
-                                langfuse_secret=self.langfuse_secret,
-                            )
-                        langFuseLogger.log_event(
-                            kwargs=kwargs,
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            user_id=kwargs.get("user", None),
-                            print_verbose=print_verbose,
-                        )
-                    if callback == "datadog":
-                        global dataDogLogger
-                        verbose_logger.debug("reaches datadog for success logging!")
-                        kwargs = {}
-                        for k, v in self.model_call_details.items():
-                            if (
-                                k != "original_response"
-                            ):  # copy.deepcopy raises errors as this could be a coroutine
-                                kwargs[k] = v
-                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
-                        if self.stream:
-                            verbose_logger.debug(
-                                f"datadog: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
-                            )
-                            if complete_streaming_response is None:
-                                continue
-                            else:
-                                print_verbose("reaches datadog for streaming logging!")
-                                result = kwargs["complete_streaming_response"]
-                        dataDogLogger.log_event(
-                            kwargs=kwargs,
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            user_id=kwargs.get("user", None),
-                            print_verbose=print_verbose,
-                        )
-                    if callback == "prometheus":
-                        global prometheusLogger
-                        verbose_logger.debug("reaches prometheus for success logging!")
-                        kwargs = {}
-                        for k, v in self.model_call_details.items():
-                            if (
-                                k != "original_response"
-                            ):  # copy.deepcopy raises errors as this could be a coroutine
-                                kwargs[k] = v
-                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
-                        if self.stream:
-                            verbose_logger.debug(
-                                f"prometheus: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
-                            )
-                            if complete_streaming_response is None:
-                                continue
-                            else:
-                                print_verbose(
-                                    "reaches prometheus for streaming logging!"
-                                )
-                                result = kwargs["complete_streaming_response"]
-                        prometheusLogger.log_event(
-                            kwargs=kwargs,
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            user_id=kwargs.get("user", None),
-                            print_verbose=print_verbose,
-                        )
-                    if callback == "generic":
-                        global genericAPILogger
-                        verbose_logger.debug("reaches langfuse for success logging!")
-                        kwargs = {}
-                        for k, v in self.model_call_details.items():
-                            if (
-                                k != "original_response"
-                            ):  # copy.deepcopy raises errors as this could be a coroutine
-                                kwargs[k] = v
-                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
-                        if self.stream:
-                            verbose_logger.debug(
-                                f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
-                            )
-                            if complete_streaming_response is None:
-                                continue
-                            else:
-                                print_verbose("reaches langfuse for streaming logging!")
-                                result = kwargs["complete_streaming_response"]
-                        if genericAPILogger is None:
-                            genericAPILogger = GenericAPILogger()
-                        genericAPILogger.log_event(
-                            kwargs=kwargs,
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            user_id=kwargs.get("user", None),
-                            print_verbose=print_verbose,
-                        )
-                    if callback == "clickhouse":
-                        global clickHouseLogger
-                        verbose_logger.debug("reaches clickhouse for success logging!")
-                        kwargs = {}
-                        for k, v in self.model_call_details.items():
-                            if (
-                                k != "original_response"
-                            ):  # copy.deepcopy raises errors as this could be a coroutine
-                                kwargs[k] = v
-                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
-                        if self.stream:
-                            verbose_logger.debug(
-                                f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
-                            )
-                            if complete_streaming_response is None:
-                                continue
-                            else:
-                                print_verbose(
-                                    "reaches clickhouse for streaming logging!"
-                                )
-                                result = kwargs["complete_streaming_response"]
-                        if clickHouseLogger is None:
-                            clickHouseLogger = ClickhouseLogger()
-                        clickHouseLogger.log_event(
-                            kwargs=kwargs,
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            user_id=kwargs.get("user", None),
-                            print_verbose=print_verbose,
-                        )
-                    if callback == "greenscale":
-                        kwargs = {}
-                        for k, v in self.model_call_details.items():
-                            if (
-                                k != "original_response"
-                            ):  # copy.deepcopy raises errors as this could be a coroutine
-                                kwargs[k] = v
-                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
-                        if self.stream:
-                            verbose_logger.debug(
-                                f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
-                            )
-                            if complete_streaming_response is None:
-                                continue
-                            else:
-                                print_verbose(
-                                    "reaches greenscale for streaming logging!"
-                                )
-                                result = kwargs["complete_streaming_response"]
-
-                        greenscaleLogger.log_event(
-                            kwargs=kwargs,
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            print_verbose=print_verbose,
-                        )
-                    if callback == "cache" and litellm.cache is not None:
-                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
-                        print_verbose("success_callback: reaches cache for logging!")
-                        kwargs = self.model_call_details
-                        if self.stream:
-                            if "complete_streaming_response" not in kwargs:
-                                print_verbose(
-                                    f"success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
-                                )
-                                pass
-                            else:
-                                print_verbose(
-                                    "success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
-                                )
-                                result = kwargs["complete_streaming_response"]
-                                # only add to cache once we have a complete streaming response
-                                litellm.cache.add_cache(result, **kwargs)
-                    if callback == "athina":
-                        deep_copy = {}
-                        for k, v in self.model_call_details.items():
-                            deep_copy[k] = v
-                        athinaLogger.log_event(
-                            kwargs=deep_copy,
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            print_verbose=print_verbose,
-                        )
-                    if callback == "traceloop":
-                        deep_copy = {}
-                        for k, v in self.model_call_details.items():
-                            if k != "original_response":
-                                deep_copy[k] = v
-                        traceloopLogger.log_event(
-                            kwargs=deep_copy,
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            user_id=kwargs.get("user", None),
-                            print_verbose=print_verbose,
-                        )
-                    if callback == "s3":
-                        global s3Logger
-                        if s3Logger is None:
-                            s3Logger = S3Logger()
-                        if self.stream:
-                            if "complete_streaming_response" in self.model_call_details:
-                                print_verbose(
-                                    "S3Logger Logger: Got Stream Event - Completed Stream Response"
-                                )
-                                s3Logger.log_event(
-                                    kwargs=self.model_call_details,
-                                    response_obj=self.model_call_details[
-                                        "complete_streaming_response"
-                                    ],
-                                    start_time=start_time,
-                                    end_time=end_time,
-                                    print_verbose=print_verbose,
-                                )
-                            else:
-                                print_verbose(
-                                    "S3Logger Logger: Got Stream Event - No complete stream response as yet"
-                                )
-                        else:
-                            s3Logger.log_event(
-                                kwargs=self.model_call_details,
-                                response_obj=result,
-                                start_time=start_time,
-                                end_time=end_time,
-                                print_verbose=print_verbose,
-                            )
-                    if (
-                        callback == "openmeter"
-                        and self.model_call_details.get("litellm_params", {}).get(
-                            "acompletion", False
-                        )
-                        == False
-                        and self.model_call_details.get("litellm_params", {}).get(
-                            "aembedding", False
-                        )
-                        == False
-                        and self.model_call_details.get("litellm_params", {}).get(
-                            "aimage_generation", False
-                        )
-                        == False
-                        and self.model_call_details.get("litellm_params", {}).get(
-                            "atranscription", False
-                        )
-                        == False
-                    ):
-                        global openMeterLogger
-                        if openMeterLogger is None:
-                            print_verbose("Instantiates openmeter client")
-                            openMeterLogger = OpenMeterLogger()
-                        if self.stream and complete_streaming_response is None:
-                            openMeterLogger.log_stream_event(
-                                kwargs=self.model_call_details,
-                                response_obj=result,
-                                start_time=start_time,
-                                end_time=end_time,
-                            )
-                        else:
-                            if self.stream and complete_streaming_response:
-                                self.model_call_details["complete_response"] = (
-                                    self.model_call_details.get(
-                                        "complete_streaming_response", {}
-                                    )
-                                )
-                                result = self.model_call_details["complete_response"]
-                            openMeterLogger.log_success_event(
-                                kwargs=self.model_call_details,
-                                response_obj=result,
-                                start_time=start_time,
-                                end_time=end_time,
-                            )
-
-                    if (
-                        isinstance(callback, CustomLogger)
-                        and self.model_call_details.get("litellm_params", {}).get(
-                            "acompletion", False
-                        )
-                        == False
-                        and self.model_call_details.get("litellm_params", {}).get(
-                            "aembedding", False
-                        )
-                        == False
-                        and self.model_call_details.get("litellm_params", {}).get(
-                            "aimage_generation", False
-                        )
-                        == False
-                        and self.model_call_details.get("litellm_params", {}).get(
-                            "atranscription", False
-                        )
-                        == False
-                    ):  # custom logger class
-                        if self.stream and complete_streaming_response is None:
-                            callback.log_stream_event(
-                                kwargs=self.model_call_details,
-                                response_obj=result,
-                                start_time=start_time,
-                                end_time=end_time,
-                            )
-                        else:
-                            if self.stream and complete_streaming_response:
-                                self.model_call_details["complete_response"] = (
-                                    self.model_call_details.get(
-                                        "complete_streaming_response", {}
-                                    )
-                                )
-                                result = self.model_call_details["complete_response"]
-                            callback.log_success_event(
-                                kwargs=self.model_call_details,
-                                response_obj=result,
-                                start_time=start_time,
-                                end_time=end_time,
-                            )
-                    if (
-                        callable(callback) == True
-                        and self.model_call_details.get("litellm_params", {}).get(
-                            "acompletion", False
-                        )
-                        == False
-                        and self.model_call_details.get("litellm_params", {}).get(
-                            "aembedding", False
-                        )
-                        == False
-                        and self.model_call_details.get("litellm_params", {}).get(
-                            "aimage_generation", False
-                        )
-                        == False
-                        and self.model_call_details.get("litellm_params", {}).get(
-                            "atranscription", False
-                        )
-                        == False
-                    ):  # custom logger functions
-                        print_verbose(
-                            f"success callbacks: Running Custom Callback Function"
-                        )
-                        customLogger.log_event(
-                            kwargs=self.model_call_details,
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            print_verbose=print_verbose,
-                            callback_func=callback,
-                        )
-
-                except Exception as e:
-                    print_verbose(
-                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging with integrations {traceback.format_exc()}"
-                    )
-                    print_verbose(
-                        f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
-                    )
-                    if capture_exception:  # log this error to sentry for debugging
-                        capture_exception(e)
-        except:
-            print_verbose(
-                "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {}\n{}".format(
-                    str(e), traceback.format_exc()
-                ),
-                log_level="ERROR",
-            )
-            pass
-
-    async def async_success_handler(
-        self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs
-    ):
-        """
-        Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions.
-        """
-        print_verbose("Logging Details LiteLLM-Async Success Call")
-        start_time, end_time, result = self._success_handler_helper_fn(
-            start_time=start_time, end_time=end_time, result=result, cache_hit=cache_hit
-        )
-        ## BUILD COMPLETE STREAMED RESPONSE
-        complete_streaming_response = None
-        if self.stream:
-            if result.choices[0].finish_reason is not None:  # if it's the last chunk
-                self.streaming_chunks.append(result)
-                # verbose_logger.debug(f"final set of received chunks: {self.streaming_chunks}")
-                try:
-                    complete_streaming_response = litellm.stream_chunk_builder(
-                        self.streaming_chunks,
-                        messages=self.model_call_details.get("messages", None),
-                        start_time=start_time,
-                        end_time=end_time,
-                    )
-                except Exception as e:
-                    print_verbose(
-                        "Error occurred building stream chunk in success logging: {}\n{}".format(
-                            str(e), traceback.format_exc()
-                        ),
-                        log_level="ERROR",
-                    )
-                    complete_streaming_response = None
-            else:
-                self.streaming_chunks.append(result)
-        if complete_streaming_response is not None:
-            print_verbose("Async success callbacks: Got a complete streaming response")
-            self.model_call_details["async_complete_streaming_response"] = (
-                complete_streaming_response
-            )
-            try:
-                if self.model_call_details.get("cache_hit", False) is True:
-                    self.model_call_details["response_cost"] = 0.0
-                else:
-                    # check if base_model set on azure
-                    base_model = _get_base_model_from_metadata(
-                        model_call_details=self.model_call_details
-                    )
-                    # base_model defaults to None if not set on model_info
-                    self.model_call_details["response_cost"] = litellm.completion_cost(
-                        completion_response=complete_streaming_response,
-                        model=base_model,
-                    )
-                verbose_logger.debug(
-                    f"Model={self.model}; cost={self.model_call_details['response_cost']}"
-                )
-            except litellm.NotFoundError as e:
-                verbose_logger.error(
-                    f"Model={self.model} not found in completion cost map. Setting 'response_cost' to None"
-                )
-                self.model_call_details["response_cost"] = None
-
-        if self.dynamic_async_success_callbacks is not None and isinstance(
-            self.dynamic_async_success_callbacks, list
-        ):
-            callbacks = self.dynamic_async_success_callbacks
-            ## keep the internal functions ##
-            for callback in litellm._async_success_callback:
-                callback_name = ""
-                if isinstance(callback, CustomLogger):
-                    callback_name = callback.__class__.__name__
-                if callable(callback):
-                    callback_name = callback.__name__
-                if "_PROXY_" in callback_name:
-                    callbacks.append(callback)
-        else:
-            callbacks = litellm._async_success_callback
-
-        result = redact_message_input_output_from_logging(
-            result=result, litellm_logging_obj=self
-        )
-
-        for callback in callbacks:
-            # check if callback can run for this request
-            litellm_params = self.model_call_details.get("litellm_params", {})
-            if litellm_params.get("no-log", False) == True:
-                # proxy cost tracking cal backs should run
-                if not (
-                    isinstance(callback, CustomLogger)
-                    and "_PROXY_" in callback.__class__.__name__
-                ):
-                    print_verbose("no-log request, skipping logging")
-                    continue
-            try:
-                if kwargs.get("no-log", False) == True:
-                    print_verbose("no-log request, skipping logging")
-                    continue
-                if callback == "cache" and litellm.cache is not None:
-                    # set_cache once complete streaming response is built
-                    print_verbose("async success_callback: reaches cache for logging!")
-                    kwargs = self.model_call_details
-                    if self.stream:
-                        if "async_complete_streaming_response" not in kwargs:
-                            print_verbose(
-                                f"async success_callback: reaches cache for logging, there is no async_complete_streaming_response. Kwargs={kwargs}\n\n"
-                            )
-                            pass
-                        else:
-                            print_verbose(
-                                "async success_callback: reaches cache for logging, there is a async_complete_streaming_response. Adding to cache"
-                            )
-                            result = kwargs["async_complete_streaming_response"]
-                            # only add to cache once we have a complete streaming response
-                            if litellm.cache is not None and not isinstance(
-                                litellm.cache.cache, S3Cache
-                            ):
-                                await litellm.cache.async_add_cache(result, **kwargs)
-                            else:
-                                litellm.cache.add_cache(result, **kwargs)
-                if callback == "openmeter":
-                    global openMeterLogger
-                    if self.stream == True:
-                        if (
-                            "async_complete_streaming_response"
-                            in self.model_call_details
-                        ):
-                            await openMeterLogger.async_log_success_event(
-                                kwargs=self.model_call_details,
-                                response_obj=self.model_call_details[
-                                    "async_complete_streaming_response"
-                                ],
-                                start_time=start_time,
-                                end_time=end_time,
-                            )
-                        else:
-                            await openMeterLogger.async_log_stream_event(  # [TODO]: move this to being an async log stream event function
-                                kwargs=self.model_call_details,
-                                response_obj=result,
-                                start_time=start_time,
-                                end_time=end_time,
-                            )
-                    else:
-                        await openMeterLogger.async_log_success_event(
-                            kwargs=self.model_call_details,
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                        )
-                if isinstance(callback, CustomLogger):  # custom logger class
-                    if self.stream == True:
-                        if (
-                            "async_complete_streaming_response"
-                            in self.model_call_details
-                        ):
-                            await callback.async_log_success_event(
-                                kwargs=self.model_call_details,
-                                response_obj=self.model_call_details[
-                                    "async_complete_streaming_response"
-                                ],
-                                start_time=start_time,
-                                end_time=end_time,
-                            )
-                        else:
-                            await callback.async_log_stream_event(  # [TODO]: move this to being an async log stream event function
-                                kwargs=self.model_call_details,
-                                response_obj=result,
-                                start_time=start_time,
-                                end_time=end_time,
-                            )
-                    else:
-                        await callback.async_log_success_event(
-                            kwargs=self.model_call_details,
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                        )
-                if callable(callback):  # custom logger functions
-                    if self.stream:
-                        if (
-                            "async_complete_streaming_response"
-                            in self.model_call_details
-                        ):
-                            await customLogger.async_log_event(
-                                kwargs=self.model_call_details,
-                                response_obj=self.model_call_details[
-                                    "async_complete_streaming_response"
-                                ],
-                                start_time=start_time,
-                                end_time=end_time,
-                                print_verbose=print_verbose,
-                                callback_func=callback,
-                            )
-                    else:
-                        await customLogger.async_log_event(
-                            kwargs=self.model_call_details,
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            print_verbose=print_verbose,
-                            callback_func=callback,
-                        )
-                if callback == "dynamodb":
-                    global dynamoLogger
-                    if dynamoLogger is None:
-                        dynamoLogger = DyanmoDBLogger()
-                    if self.stream:
-                        if (
-                            "async_complete_streaming_response"
-                            in self.model_call_details
-                        ):
-                            print_verbose(
-                                "DynamoDB Logger: Got Stream Event - Completed Stream Response"
-                            )
-                            await dynamoLogger._async_log_event(
-                                kwargs=self.model_call_details,
-                                response_obj=self.model_call_details[
-                                    "async_complete_streaming_response"
-                                ],
-                                start_time=start_time,
-                                end_time=end_time,
-                                print_verbose=print_verbose,
-                            )
-                        else:
-                            print_verbose(
-                                "DynamoDB Logger: Got Stream Event - No complete stream response as yet"
-                            )
-                    else:
-                        await dynamoLogger._async_log_event(
-                            kwargs=self.model_call_details,
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            print_verbose=print_verbose,
-                        )
-            except Exception as e:
-                verbose_logger.error(
-                    f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
-                )
-                pass
-
-    def _failure_handler_helper_fn(
-        self, exception, traceback_exception, start_time=None, end_time=None
-    ):
-        if start_time is None:
-            start_time = self.start_time
-        if end_time is None:
-            end_time = datetime.datetime.now()
-
-        # on some exceptions, model_call_details is not always initialized, this ensures that we still log those exceptions
-        if not hasattr(self, "model_call_details"):
-            self.model_call_details = {}
-
-        self.model_call_details["log_event_type"] = "failed_api_call"
-        self.model_call_details["exception"] = exception
-        self.model_call_details["traceback_exception"] = traceback_exception
-        self.model_call_details["end_time"] = end_time
-        self.model_call_details.setdefault("original_response", None)
-        return start_time, end_time
-
-    def failure_handler(
-        self, exception, traceback_exception, start_time=None, end_time=None
-    ):
-        print_verbose(
-            f"Logging Details LiteLLM-Failure Call: {litellm.failure_callback}"
-        )
-        try:
-            start_time, end_time = self._failure_handler_helper_fn(
-                exception=exception,
-                traceback_exception=traceback_exception,
-                start_time=start_time,
-                end_time=end_time,
-            )
-            callbacks = []  # init this to empty incase it's not created
-
-            if self.dynamic_failure_callbacks is not None and isinstance(
-                self.dynamic_failure_callbacks, list
-            ):
-                callbacks = self.dynamic_failure_callbacks
-                ## keep the internal functions ##
-                for callback in litellm.failure_callback:
-                    if (
-                        isinstance(callback, CustomLogger)
-                        and "_PROXY_" in callback.__class__.__name__
-                    ):
-                        callbacks.append(callback)
-            else:
-                callbacks = litellm.failure_callback
-
-            result = None  # result sent to all loggers, init this to None incase it's not created
-
-            result = redact_message_input_output_from_logging(
-                result=result, litellm_logging_obj=self
-            )
-            for callback in callbacks:
-                try:
-                    if callback == "lite_debugger":
-                        print_verbose("reaches lite_debugger for logging!")
-                        print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
-                        result = {
-                            "model": self.model,
-                            "created": time.time(),
-                            "error": traceback_exception,
-                            "usage": {
-                                "prompt_tokens": prompt_token_calculator(
-                                    self.model, messages=self.messages
-                                ),
-                                "completion_tokens": 0,
-                            },
-                        }
-                        liteDebuggerClient.log_event(
-                            model=self.model,
-                            messages=self.messages,
-                            end_user=self.model_call_details.get("user", "default"),
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            litellm_call_id=self.litellm_call_id,
-                            print_verbose=print_verbose,
-                            call_type=self.call_type,
-                            stream=self.stream,
-                        )
-                    if callback == "lunary":
-                        print_verbose("reaches lunary for logging error!")
-
-                        model = self.model
-
-                        input = self.model_call_details["input"]
-
-                        _type = (
-                            "embed"
-                            if self.call_type == CallTypes.embedding.value
-                            else "llm"
-                        )
-
-                        lunaryLogger.log_event(
-                            type=_type,
-                            event="error",
-                            user_id=self.model_call_details.get("user", "default"),
-                            model=model,
-                            input=input,
-                            error=traceback_exception,
-                            run_id=self.litellm_call_id,
-                            start_time=start_time,
-                            end_time=end_time,
-                            print_verbose=print_verbose,
-                        )
-                    if callback == "sentry":
-                        print_verbose("sending exception to sentry")
-                        if capture_exception:
-                            capture_exception(exception)
-                        else:
-                            print_verbose(
-                                f"capture exception not initialized: {capture_exception}"
-                            )
-                    if callable(callback):  # custom logger functions
-                        customLogger.log_event(
-                            kwargs=self.model_call_details,
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            print_verbose=print_verbose,
-                            callback_func=callback,
-                        )
-                    if (
-                        isinstance(callback, CustomLogger)
-                        and self.model_call_details.get("litellm_params", {}).get(
-                            "acompletion", False
-                        )
-                        == False
-                        and self.model_call_details.get("litellm_params", {}).get(
-                            "aembedding", False
-                        )
-                        == False
-                    ):  # custom logger class
-                        callback.log_failure_event(
-                            start_time=start_time,
-                            end_time=end_time,
-                            response_obj=result,
-                            kwargs=self.model_call_details,
-                        )
-                    if callback == "langfuse":
-                        global langFuseLogger
-                        verbose_logger.debug("reaches langfuse for logging failure")
-                        kwargs = {}
-                        for k, v in self.model_call_details.items():
-                            if (
-                                k != "original_response"
-                            ):  # copy.deepcopy raises errors as this could be a coroutine
-                                kwargs[k] = v
-                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
-                        if langFuseLogger is None or (
-                            (
-                                self.langfuse_public_key is not None
-                                and self.langfuse_public_key
-                                != langFuseLogger.public_key
-                            )
-                            and (
-                                self.langfuse_public_key is not None
-                                and self.langfuse_public_key
-                                != langFuseLogger.public_key
-                            )
-                        ):
-                            langFuseLogger = LangFuseLogger(
-                                langfuse_public_key=self.langfuse_public_key,
-                                langfuse_secret=self.langfuse_secret,
-                            )
-                        langFuseLogger.log_event(
-                            start_time=start_time,
-                            end_time=end_time,
-                            response_obj=None,
-                            user_id=kwargs.get("user", None),
-                            print_verbose=print_verbose,
-                            status_message=str(exception),
-                            level="ERROR",
-                            kwargs=self.model_call_details,
-                        )
-                    if callback == "traceloop":
-                        traceloopLogger.log_event(
-                            start_time=start_time,
-                            end_time=end_time,
-                            response_obj=None,
-                            user_id=kwargs.get("user", None),
-                            print_verbose=print_verbose,
-                            status_message=str(exception),
-                            level="ERROR",
-                            kwargs=self.model_call_details,
-                        )
-                    if callback == "prometheus":
-                        global prometheusLogger
-                        verbose_logger.debug("reaches prometheus for success logging!")
-                        kwargs = {}
-                        for k, v in self.model_call_details.items():
-                            if (
-                                k != "original_response"
-                            ):  # copy.deepcopy raises errors as this could be a coroutine
-                                kwargs[k] = v
-                        kwargs["exception"] = str(exception)
-                        prometheusLogger.log_event(
-                            kwargs=kwargs,
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            user_id=kwargs.get("user", None),
-                            print_verbose=print_verbose,
-                        )
-
-                    if callback == "logfire":
-                        global logfireLogger
-                        verbose_logger.debug("reaches logfire for failure logging!")
-                        kwargs = {}
-                        for k, v in self.model_call_details.items():
-                            if (
-                                k != "original_response"
-                            ):  # copy.deepcopy raises errors as this could be a coroutine
-                                kwargs[k] = v
-                        kwargs["exception"] = exception
-
-                        logfireLogger.log_event(
-                            kwargs=kwargs,
-                            response_obj=result,
-                            start_time=start_time,
-                            end_time=end_time,
-                            level=LogfireLevel.ERROR.value,
-                            print_verbose=print_verbose,
-                        )
-                except Exception as e:
-                    print_verbose(
-                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {str(e)}"
-                    )
-                    print_verbose(
-                        f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
-                    )
-                    if capture_exception:  # log this error to sentry for debugging
-                        capture_exception(e)
-        except Exception as e:
-            print_verbose(
-                f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging {traceback.format_exc()}"
-            )
-            pass
-
-    async def async_failure_handler(
-        self, exception, traceback_exception, start_time=None, end_time=None
-    ):
-        """
-        Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions.
-        """
-        start_time, end_time = self._failure_handler_helper_fn(
-            exception=exception,
-            traceback_exception=traceback_exception,
-            start_time=start_time,
-            end_time=end_time,
-        )
-        result = None  # result sent to all loggers, init this to None incase it's not created
-        for callback in litellm._async_failure_callback:
-            try:
-                if isinstance(callback, CustomLogger):  # custom logger class
-                    await callback.async_log_failure_event(
-                        kwargs=self.model_call_details,
-                        response_obj=result,
-                        start_time=start_time,
-                        end_time=end_time,
-                    )  # type: ignore
-                if callable(callback):  # custom logger functions
-                    await customLogger.async_log_event(
-                        kwargs=self.model_call_details,
-                        response_obj=result,
-                        start_time=start_time,
-                        end_time=end_time,
-                        print_verbose=print_verbose,
-                        callback_func=callback,
-                    )
-            except Exception as e:
-                print_verbose(
-                    f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
-                )
 
 
 def exception_logging(
@@ -2848,6 +346,11 @@ def _init_custom_logger_compatible_class(
 def function_setup(
     original_function: str, rules_obj, start_time, *args, **kwargs
 ):  # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
+    ### NOTICES ###
+    if litellm.set_verbose is True:
+        verbose_logger.warning(
+            "`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs."
+        )
     try:
         global callback_list, add_breadcrumb, user_logger_fn, Logging
         function_id = kwargs["id"] if "id" in kwargs else None
@@ -3033,7 +536,7 @@ def function_setup(
         ):
             messages = kwargs.get("input", "speech")
         stream = True if "stream" in kwargs and kwargs["stream"] == True else False
-        logging_obj = Logging(
+        logging_obj = litellm.litellm_core_utils.litellm_logging.Logging(
             model=model,
             messages=messages,
             stream=stream,
@@ -3451,11 +954,6 @@ def client(original_function):
                 logging_obj.failure_handler(
                     e, traceback_exception, start_time, end_time
                 )  # DO NOT MAKE THREADED - router retry fallback relies on this!
-                my_thread = threading.Thread(
-                    target=handle_failure,
-                    args=(e, traceback_exception, start_time, end_time, args, kwargs),
-                )  # don't interrupt execution of main thread
-                my_thread.start()
                 if hasattr(e, "message"):
                     if (
                         liteDebuggerClient and liteDebuggerClient.dashboard_url != None
@@ -4323,229 +1821,6 @@ def token_counter(
     return num_tokens
 
 
-def _cost_per_token_custom_pricing_helper(
-    prompt_tokens=0,
-    completion_tokens=0,
-    response_time_ms=None,
-    ### CUSTOM PRICING ###
-    custom_cost_per_token: Optional[CostPerToken] = None,
-    custom_cost_per_second: Optional[float] = None,
-) -> Optional[Tuple[float, float]]:
-    """Internal helper function for calculating cost, if custom pricing given"""
-    if custom_cost_per_token is None and custom_cost_per_second is None:
-        return None
-
-    if custom_cost_per_token is not None:
-        input_cost = custom_cost_per_token["input_cost_per_token"] * prompt_tokens
-        output_cost = custom_cost_per_token["output_cost_per_token"] * completion_tokens
-        return input_cost, output_cost
-    elif custom_cost_per_second is not None:
-        output_cost = custom_cost_per_second * response_time_ms / 1000  # type: ignore
-        return 0, output_cost
-
-    return None
-
-
-def cost_per_token(
-    model: str = "",
-    prompt_tokens=0,
-    completion_tokens=0,
-    response_time_ms=None,
-    custom_llm_provider=None,
-    region_name=None,
-    ### CUSTOM PRICING ###
-    custom_cost_per_token: Optional[CostPerToken] = None,
-    custom_cost_per_second: Optional[float] = None,
-) -> Tuple[float, float]:
-    """
-    Calculates the cost per token for a given model, prompt tokens, and completion tokens.
-
-    Parameters:
-        model (str): The name of the model to use. Default is ""
-        prompt_tokens (int): The number of tokens in the prompt.
-        completion_tokens (int): The number of tokens in the completion.
-        response_time (float): The amount of time, in milliseconds, it took the call to complete.
-        custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list)
-        custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
-        custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
-
-    Returns:
-        tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively.
-    """
-    if model is None:
-        raise Exception("Invalid arg. Model cannot be none.")
-    ## CUSTOM PRICING ##
-    response_cost = _cost_per_token_custom_pricing_helper(
-        prompt_tokens=prompt_tokens,
-        completion_tokens=completion_tokens,
-        response_time_ms=response_time_ms,
-        custom_cost_per_second=custom_cost_per_second,
-        custom_cost_per_token=custom_cost_per_token,
-    )
-    if response_cost is not None:
-        return response_cost[0], response_cost[1]
-
-    # given
-    prompt_tokens_cost_usd_dollar: float = 0
-    completion_tokens_cost_usd_dollar: float = 0
-    model_cost_ref = litellm.model_cost
-    model_with_provider = model
-    if custom_llm_provider is not None:
-        model_with_provider = custom_llm_provider + "/" + model
-        if region_name is not None:
-            model_with_provider_and_region = (
-                f"{custom_llm_provider}/{region_name}/{model}"
-            )
-            if (
-                model_with_provider_and_region in model_cost_ref
-            ):  # use region based pricing, if it's available
-                model_with_provider = model_with_provider_and_region
-
-    model_without_prefix = model
-    model_parts = model.split("/")
-    if len(model_parts) > 1:
-        model_without_prefix = model_parts[1]
-    else:
-        model_without_prefix = model
-    """
-    Code block that formats model to lookup in litellm.model_cost
-    Option1. model = "bedrock/ap-northeast-1/anthropic.claude-instant-v1". This is the most accurate since it is region based. Should always be option 1
-    Option2. model = "openai/gpt-4"       - model = provider/model
-    Option3. model = "anthropic.claude-3" - model = model
-    """
-    if (
-        model_with_provider in model_cost_ref
-    ):  # Option 2. use model with provider, model = "openai/gpt-4"
-        model = model_with_provider
-    elif model in model_cost_ref:  # Option 1. use model passed, model="gpt-4"
-        model = model
-    elif (
-        model_without_prefix in model_cost_ref
-    ):  # Option 3. if user passed model="bedrock/anthropic.claude-3", use model="anthropic.claude-3"
-        model = model_without_prefix
-
-    # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
-    print_verbose(f"Looking up model={model} in model_cost_map")
-    if model in model_cost_ref:
-        print_verbose(f"Success: model={model} in model_cost_map")
-        print_verbose(
-            f"prompt_tokens={prompt_tokens}; completion_tokens={completion_tokens}"
-        )
-        if (
-            model_cost_ref[model].get("input_cost_per_token", None) is not None
-            and model_cost_ref[model].get("output_cost_per_token", None) is not None
-        ):
-            ## COST PER TOKEN ##
-            prompt_tokens_cost_usd_dollar = (
-                model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
-            )
-            completion_tokens_cost_usd_dollar = (
-                model_cost_ref[model]["output_cost_per_token"] * completion_tokens
-            )
-        elif (
-            model_cost_ref[model].get("output_cost_per_second", None) is not None
-            and response_time_ms is not None
-        ):
-            print_verbose(
-                f"For model={model} - output_cost_per_second: {model_cost_ref[model].get('output_cost_per_second')}; response time: {response_time_ms}"
-            )
-            ## COST PER SECOND ##
-            prompt_tokens_cost_usd_dollar = 0
-            completion_tokens_cost_usd_dollar = (
-                model_cost_ref[model]["output_cost_per_second"]
-                * response_time_ms
-                / 1000
-            )
-        elif (
-            model_cost_ref[model].get("input_cost_per_second", None) is not None
-            and response_time_ms is not None
-        ):
-            print_verbose(
-                f"For model={model} - input_cost_per_second: {model_cost_ref[model].get('input_cost_per_second')}; response time: {response_time_ms}"
-            )
-            ## COST PER SECOND ##
-            prompt_tokens_cost_usd_dollar = (
-                model_cost_ref[model]["input_cost_per_second"] * response_time_ms / 1000
-            )
-            completion_tokens_cost_usd_dollar = 0.0
-        print_verbose(
-            f"Returned custom cost for model={model} - prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}, completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}"
-        )
-        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
-    elif "ft:gpt-3.5-turbo" in model:
-        print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
-        # fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm
-        prompt_tokens_cost_usd_dollar = (
-            model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens
-        )
-        completion_tokens_cost_usd_dollar = (
-            model_cost_ref["ft:gpt-3.5-turbo"]["output_cost_per_token"]
-            * completion_tokens
-        )
-        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
-    elif "ft:davinci-002" in model:
-        print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
-        # fuzzy match ft:davinci-002:abcd-id-cool-litellm
-        prompt_tokens_cost_usd_dollar = (
-            model_cost_ref["ft:davinci-002"]["input_cost_per_token"] * prompt_tokens
-        )
-        completion_tokens_cost_usd_dollar = (
-            model_cost_ref["ft:davinci-002"]["output_cost_per_token"]
-            * completion_tokens
-        )
-        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
-    elif "ft:babbage-002" in model:
-        print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
-        # fuzzy match ft:babbage-002:abcd-id-cool-litellm
-        prompt_tokens_cost_usd_dollar = (
-            model_cost_ref["ft:babbage-002"]["input_cost_per_token"] * prompt_tokens
-        )
-        completion_tokens_cost_usd_dollar = (
-            model_cost_ref["ft:babbage-002"]["output_cost_per_token"]
-            * completion_tokens
-        )
-        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
-    elif model in litellm.azure_llms:
-        verbose_logger.debug(f"Cost Tracking: {model} is an Azure LLM")
-        model = litellm.azure_llms[model]
-        verbose_logger.debug(
-            f"applying cost={model_cost_ref[model]['input_cost_per_token']} for prompt_tokens={prompt_tokens}"
-        )
-        prompt_tokens_cost_usd_dollar = (
-            model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
-        )
-        verbose_logger.debug(
-            f"applying cost={model_cost_ref[model]['output_cost_per_token']} for completion_tokens={completion_tokens}"
-        )
-        completion_tokens_cost_usd_dollar = (
-            model_cost_ref[model]["output_cost_per_token"] * completion_tokens
-        )
-        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
-    elif model in litellm.azure_embedding_models:
-        verbose_logger.debug(f"Cost Tracking: {model} is an Azure Embedding Model")
-        model = litellm.azure_embedding_models[model]
-        prompt_tokens_cost_usd_dollar = (
-            model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
-        )
-        completion_tokens_cost_usd_dollar = (
-            model_cost_ref[model]["output_cost_per_token"] * completion_tokens
-        )
-        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
-    else:
-        # if model is not in model_prices_and_context_window.json. Raise an exception-let users know
-        error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n"
-        raise litellm.exceptions.NotFoundError(  # type: ignore
-            message=error_str,
-            model=model,
-            response=httpx.Response(
-                status_code=404,
-                content=error_str,
-                request=httpx.Request(method="cost_per_token", url="https://github.com/BerriAI/litellm"),  # type: ignore
-            ),
-            llm_provider="",
-        )
-
-
 def supports_httpx_timeout(custom_llm_provider: str) -> bool:
     """
     Helper function to know if a provider implementation supports httpx timeout
@@ -7624,153 +4899,6 @@ def set_callbacks(callback_list, function_id=None):
         raise e
 
 
-# NOTE: DEPRECATING this in favor of using failure_handler() in Logging:
-def handle_failure(exception, traceback_exception, start_time, end_time, args, kwargs):
-    global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger
-    try:
-        # print_verbose(f"handle_failure args: {args}")
-        # print_verbose(f"handle_failure kwargs: {kwargs}")
-
-        success_handler = additional_details.pop("success_handler", None)
-        failure_handler = additional_details.pop("failure_handler", None)
-
-        additional_details["Event_Name"] = additional_details.pop(
-            "failed_event_name", "litellm.failed_query"
-        )
-        print_verbose(f"self.failure_callback: {litellm.failure_callback}")
-        for callback in litellm.failure_callback:
-            try:
-                if callback == "slack":
-                    slack_msg = ""
-                    if len(kwargs) > 0:
-                        for key in kwargs:
-                            slack_msg += f"{key}: {kwargs[key]}\n"
-                    if len(args) > 0:
-                        for i, arg in enumerate(args):
-                            slack_msg += f"LiteLLM_Args_{str(i)}: {arg}"
-                    for detail in additional_details:
-                        slack_msg += f"{detail}: {additional_details[detail]}\n"
-                    slack_msg += f"Traceback: {traceback_exception}"
-                    truncated_slack_msg = textwrap.shorten(
-                        slack_msg, width=512, placeholder="..."
-                    )
-                    slack_app.client.chat_postMessage(
-                        channel=alerts_channel, text=truncated_slack_msg
-                    )
-                elif callback == "sentry":
-                    capture_exception(exception)
-                elif callback == "posthog":
-                    print_verbose(
-                        f"inside posthog, additional_details: {len(additional_details.keys())}"
-                    )
-                    ph_obj = {}
-                    if len(kwargs) > 0:
-                        ph_obj = kwargs
-                    if len(args) > 0:
-                        for i, arg in enumerate(args):
-                            ph_obj["litellm_args_" + str(i)] = arg
-                    for detail in additional_details:
-                        ph_obj[detail] = additional_details[detail]
-                    event_name = additional_details["Event_Name"]
-                    print_verbose(f"ph_obj: {ph_obj}")
-                    print_verbose(f"PostHog Event Name: {event_name}")
-                    if "user_id" in additional_details:
-                        posthog.capture(
-                            additional_details["user_id"], event_name, ph_obj
-                        )
-                    else:  # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python
-                        unique_id = str(uuid.uuid4())
-                        posthog.capture(unique_id, event_name)
-                        print_verbose(f"successfully logged to PostHog!")
-                elif callback == "berrispend":
-                    print_verbose("reaches berrispend for logging!")
-                    model = args[0] if len(args) > 0 else kwargs["model"]
-                    messages = args[1] if len(args) > 1 else kwargs["messages"]
-                    result = {
-                        "model": model,
-                        "created": time.time(),
-                        "error": traceback_exception,
-                        "usage": {
-                            "prompt_tokens": prompt_token_calculator(
-                                model, messages=messages
-                            ),
-                            "completion_tokens": 0,
-                        },
-                    }
-                    berrispendLogger.log_event(
-                        model=model,
-                        messages=messages,
-                        response_obj=result,
-                        start_time=start_time,
-                        end_time=end_time,
-                        print_verbose=print_verbose,
-                    )
-                elif callback == "aispend":
-                    print_verbose("reaches aispend for logging!")
-                    model = args[0] if len(args) > 0 else kwargs["model"]
-                    messages = args[1] if len(args) > 1 else kwargs["messages"]
-                    result = {
-                        "model": model,
-                        "created": time.time(),
-                        "usage": {
-                            "prompt_tokens": prompt_token_calculator(
-                                model, messages=messages
-                            ),
-                            "completion_tokens": 0,
-                        },
-                    }
-                    aispendLogger.log_event(
-                        model=model,
-                        response_obj=result,
-                        start_time=start_time,
-                        end_time=end_time,
-                        print_verbose=print_verbose,
-                    )
-                elif callback == "supabase":
-                    print_verbose("reaches supabase for logging!")
-                    print_verbose(f"supabaseClient: {supabaseClient}")
-                    model = args[0] if len(args) > 0 else kwargs["model"]
-                    messages = args[1] if len(args) > 1 else kwargs["messages"]
-                    result = {
-                        "model": model,
-                        "created": time.time(),
-                        "error": traceback_exception,
-                        "usage": {
-                            "prompt_tokens": prompt_token_calculator(
-                                model, messages=messages
-                            ),
-                            "completion_tokens": 0,
-                        },
-                    }
-                    supabaseClient.log_event(
-                        model=model,
-                        messages=messages,
-                        end_user=kwargs.get("user", "default"),
-                        response_obj=result,
-                        start_time=start_time,
-                        end_time=end_time,
-                        litellm_call_id=kwargs["litellm_call_id"],
-                        print_verbose=print_verbose,
-                    )
-            except:
-                print_verbose(
-                    f"Error Occurred while logging failure: {traceback.format_exc()}"
-                )
-                pass
-
-        if failure_handler and callable(failure_handler):
-            call_details = {
-                "exception": exception,
-                "additional_details": additional_details,
-            }
-            failure_handler(call_details)
-        pass
-    except Exception as e:
-        # LOGGING
-        exception_logging(logger_fn=user_logger_fn, exception=e)
-        pass
-
-
 async def convert_to_streaming_response_async(response_object: Optional[dict] = None):
     """
     Asynchronously converts a response object to a streaming response.