diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
index 13a9e4bdc..9bd5f90be 100644
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@@ -737,8 +737,8 @@ def response_cost_calculator(
         )
         return None
     except Exception as e:
-        verbose_logger.error(
-            "litellm.cost_calculator.py::response_cost_calculator - Exception occurred - {}/n{}".format(
+        verbose_logger.warning(
+            "litellm.cost_calculator.py::response_cost_calculator - Returning None. Exception occurred - {}/n{}".format(
                 str(e), traceback.format_exc()
             )
         )
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
index e97be428a..a94e151f4 100644
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -1499,6 +1499,7 @@ class Logging:
         self.model_call_details["traceback_exception"] = traceback_exception
         self.model_call_details["end_time"] = end_time
         self.model_call_details.setdefault("original_response", None)
+        self.model_call_details["response_cost"] = 0
         return start_time, end_time
 
     def failure_handler(
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 18a9966be..b3888548d 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -1,33 +1,4 @@
 model_list:
-  - model_name: azure-ai-mistral
+  - model_name: ollama-mistral
     litellm_params:
-      api_base: os.environ/AZURE_AI_MISTRAL_API_BASE
-      api_key: os.environ/AZURE_AI_MISTRAL_API_KEY
-      model: azure_ai/Mistral-large-nmefg
-      input_cost_per_token: 0.00001
-      output_cost_per_token: 0.000004
-  - model_name: azure-ai-phi
-    litellm_params:
-      api_base: os.environ/AZURE_AI_PHI_API_BASE
-      api_key: os.environ/AZURE_AI_PHI_API_KEY
-      model: azure_ai/Phi-3-medium-128k-instruct-fpmvj
-  - model_name: dbrx
-    litellm_params:
-      model: databricks/databricks-dbrx-instruct
-      api_key: os.environ/DATABRICKS_API_KEY
-      api_base: os.environ/DATABRICKS_API_BASE
-      input_cost_per_token: 0.00000075
-      output_cost_per_token: 0.00000225
-
-
-
-general_settings:
-  master_key: sk-1234
-  pass_through_endpoints:
-    - path: "/v1/rerank"
-      target: "https://api.cohere.com/v1/rerank"
-      auth: true # 👈 Key change to use LiteLLM Auth / Keys
-      headers:
-        Authorization: "bearer os.environ/COHERE_API_KEY"
-        content-type: application/json
-        accept: application/json
\ No newline at end of file
+      model: ollama/mistral
\ No newline at end of file
diff --git a/litellm/tests/test_completion_cost.py b/litellm/tests/test_completion_cost.py
index d3447721b..1daf1531c 100644
--- a/litellm/tests/test_completion_cost.py
+++ b/litellm/tests/test_completion_cost.py
@@ -42,6 +42,14 @@ class CustomLoggingHandler(CustomLogger):
 
         print(f"response_cost: {self.response_cost} ")
 
+    def log_failure_event(self, kwargs, response_obj, start_time, end_time):
+        print("Reaches log failure event!")
+        self.response_cost = kwargs["response_cost"]
+
+    async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
+        print("Reaches async log failure event!")
+        self.response_cost = kwargs["response_cost"]
+
 
 @pytest.mark.parametrize("sync_mode", [True, False])
 @pytest.mark.asyncio
@@ -74,6 +82,41 @@ async def test_custom_pricing(sync_mode):
     assert new_handler.response_cost == 0
 
 
+@pytest.mark.parametrize(
+    "sync_mode",
+    [True, False],
+)
+@pytest.mark.asyncio
+async def test_failure_completion_cost(sync_mode):
+    new_handler = CustomLoggingHandler()
+    litellm.callbacks = [new_handler]
+    if sync_mode:
+        try:
+            response = litellm.completion(
+                model="gpt-3.5-turbo",
+                messages=[{"role": "user", "content": "Hey!"}],
+                mock_response=Exception("this should trigger an error"),
+            )
+        except Exception:
+            pass
+        time.sleep(5)
+    else:
+        try:
+            response = await litellm.acompletion(
+                model="gpt-3.5-turbo",
+                messages=[{"role": "user", "content": "Hey!"}],
+                mock_response=Exception("this should trigger an error"),
+            )
+        except Exception:
+            pass
+        await asyncio.sleep(5)
+
+    print(f"new_handler.response_cost: {new_handler.response_cost}")
+    assert new_handler.response_cost is not None
+
+    assert new_handler.response_cost == 0
+
+
 def test_custom_pricing_as_completion_cost_param():
     from litellm import Choices, Message, ModelResponse
     from litellm.utils import Usage
diff --git a/litellm/utils.py b/litellm/utils.py
index aaa8f7a4a..a73873394 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -694,7 +694,7 @@ def client(original_function):
             kwargs["litellm_call_id"] = str(uuid.uuid4())
         try:
             model = args[0] if len(args) > 0 else kwargs["model"]
-        except:
+        except Exception:
             model = None
             if (
                 call_type != CallTypes.image_generation.value