diff --git a/litellm/__init__.py b/litellm/__init__.py
index 03114b78e..b0099202e 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -269,6 +269,7 @@ from .exceptions import (
     ServiceUnavailableError,
     OpenAIError,
     ContextWindowExceededError,
+    BudgetExceededError
 
 )
 from .budget_manager import BudgetManager
diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc
index 9d7f4f02b..8b54c3254 100644
Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ
diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index 3b91b0fb3..fcec8b4e6 100644
Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index d323a9eae..11ad024d4 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/tests/test_litellm_max_budget.py b/litellm/tests/test_litellm_max_budget.py
index 15ca48efe..0e933c604 100644
--- a/litellm/tests/test_litellm_max_budget.py
+++ b/litellm/tests/test_litellm_max_budget.py
@@ -1,6 +1,6 @@
-#### What this tests ####
-#    This tests calling litellm.max_budget by making back-to-back gpt-4 calls
-# commenting out this test for circle ci, as it causes other tests to fail, since litellm.max_budget would impact other litellm imports
+# #### What this tests ####
+# #    This tests calling litellm.max_budget by making back-to-back gpt-4 calls
+# # commenting out this test for circle ci, as it causes other tests to fail, since litellm.max_budget would impact other litellm imports
 # import sys, os, json
 # import traceback
 # import pytest 
@@ -9,13 +9,23 @@
 #     0, os.path.abspath("../..")
 # )  # Adds the parent directory to the system path
 # import litellm 
-# litellm.set_verbose = True
-# from litellm import completion
+# # litellm.set_verbose = True
+# from litellm import completion, BudgetExceededError
 
-# litellm.max_budget = 0.001 # sets a max budget of $0.001
+# def test_max_budget():
+#     try:
+#         litellm.max_budget = 0.001 # sets a max budget of $0.001
+
+#         messages = [{"role": "user", "content": "Hey, how's it going"}]
+#         response = completion(model="gpt-4", messages=messages, stream=True)
+#         for chunk in response: 
+#             continue
+#         print(litellm._current_cost)
+#         completion(model="gpt-4", messages=messages, stream=True)
+#         litellm.max_budget = float('inf')
+#     except BudgetExceededError as e: 
+#         pass
+#     except Exception as e:
+#         pytest.fail(f"An error occured: {str(e)}")
 
-# messages = [{"role": "user", "content": "Hey, how's it going"}]
-# completion(model="gpt-4", messages=messages)
-# completion(model="gpt-4", messages=messages)
-# print(litellm._current_cost)
 
diff --git a/litellm/utils.py b/litellm/utils.py
index 69f53a877..7df4330c4 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -180,6 +180,10 @@ class Logging:
         # Log the exact input to the LLM API
         print_verbose(f"Logging Details Pre-API Call for call id {self.litellm_call_id}")
         try:
+            if start_time is None:
+                start_time = self.start_time
+            if end_time is None:
+                end_time = datetime.datetime.now()
             # print_verbose(f"logging pre call for model: {self.model} with call type: {self.call_type}")
             self.model_call_details["input"] = input
             self.model_call_details["api_key"] = api_key
@@ -202,6 +206,11 @@ class Logging:
                         f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
                     )
 
+            if litellm.max_budget and self.stream:
+                time_diff = (end_time - start_time).total_seconds()
+                float_diff = float(time_diff)
+                litellm._current_cost += litellm.completion_cost(model=self.model, prompt="".join(message["content"] for message in self.messages), completion="", total_time=float_diff)
+
             # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
             for callback in litellm.input_callback:
                 try:
@@ -314,6 +323,12 @@ class Logging:
             if end_time is None:
                 end_time = datetime.datetime.now()
             print_verbose(f"success callbacks: {litellm.success_callback}")
+
+            if litellm.max_budget and self.stream:
+                time_diff = (end_time - start_time).total_seconds()
+                float_diff = float(time_diff)
+                litellm._current_cost += litellm.completion_cost(model=self.model, prompt="", completion=result["content"], total_time=float_diff)
+
             for callback in litellm.success_callback:
                 try:
                     if callback == "lite_debugger":
@@ -574,10 +589,6 @@ def client(original_function):
             if litellm.caching or litellm.caching_with_models or litellm.cache != None: # user init a cache object
                 litellm.cache.add_cache(result, *args, **kwargs)
             
-            # [OPTIONAL] UPDATE BUDGET
-            if litellm.max_budget: 
-                litellm._current_cost += litellm.completion_cost(completion_response=result)
-            
             # [OPTIONAL] Return LiteLLM call_id
             if litellm.use_client == True:
                 result['litellm_call_id'] = litellm_call_id
@@ -2383,7 +2394,6 @@ class CustomStreamWrapper:
     
     def handle_cohere_chunk(self, chunk):
         chunk = chunk.decode("utf-8")
-        print(f"cohere chunk: {chunk}")
         data_json = json.loads(chunk)
         try:
             print(f"data json: {data_json}")
@@ -2474,7 +2484,8 @@ class CustomStreamWrapper:
                 completion_obj["content"] = self.handle_cohere_chunk(chunk)
             else: # openai chat/azure models
                 chunk = next(self.completion_stream)
-                return chunk # open ai returns finish_reason, we should just return the openai chunk
+                completion_obj["content"] = chunk["choices"][0]["delta"]["content"]
+                # return chunk # open ai returns finish_reason, we should just return the openai chunk
             
                 #completion_obj["content"] = self.handle_openai_chat_completion_chunk(chunk)
             # LOGGING