diff --git a/docs/my-website/docs/proxy/configs.md b/docs/my-website/docs/proxy/configs.md
index 0a155828b..2b3edfadb 100644
--- a/docs/my-website/docs/proxy/configs.md
+++ b/docs/my-website/docs/proxy/configs.md
@@ -202,7 +202,7 @@ print(response)
 </Tabs>
 
 
-## Save Model-specific params (API Base, API Keys, Temperature, Max Tokens, Seed, Organization, Headers etc.)
+## Save Model-specific params (API Base, Keys, Temperature, Max Tokens, Organization, Headers etc.)
 You can use the config to save model-specific information like api_base, api_key, temperature, max_tokens, etc. 
 
 [**All input params**](https://docs.litellm.ai/docs/completion/input#input-params-1)
@@ -244,6 +244,45 @@ $ litellm --config /path/to/config.yaml
 ```
 
 
+## Load Balancing 
+
+Use this to call multiple instances of the same model and configure things like [routing strategy](../routing.md#advanced). 
+
+```yaml
+router_settings:
+  routing_strategy: "latency-based-routing" # routes to the fastest deployment in the group
+
+model_list:
+  - model_name: zephyr-beta
+    litellm_params:
+        model: huggingface/HuggingFaceH4/zephyr-7b-beta
+        api_base: http://0.0.0.0:8001
+  - model_name: zephyr-beta
+    litellm_params:
+        model: huggingface/HuggingFaceH4/zephyr-7b-beta
+        api_base: http://0.0.0.0:8002
+  - model_name: zephyr-beta
+    litellm_params:
+        model: huggingface/HuggingFaceH4/zephyr-7b-beta
+        api_base: http://0.0.0.0:8003
+  - model_name: gpt-3.5-turbo
+    litellm_params:
+        model: gpt-3.5-turbo
+        api_key: <my-openai-key>
+  - model_name: gpt-3.5-turbo-16k
+    litellm_params:
+        model: gpt-3.5-turbo-16k
+        api_key: <my-openai-key>
+
+litellm_settings:
+  num_retries: 3 # retry call 3 times on each model_name (e.g. zephyr-beta)
+  request_timeout: 10 # raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout 
+  fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo"]}] # fallback to gpt-3.5-turbo if call fails num_retries 
+  context_window_fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo-16k"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}] # fallback to gpt-3.5-turbo-16k if context window error
+  allowed_fails: 3 # cooldown model if it fails > 1 call in a minute. 
+```
+
+
 ## Set Azure `base_model` for cost tracking
 
 **Problem**: Azure returns `gpt-4` in the response when `azure/gpt-4-1106-preview` is used. This leads to inaccurate cost tracking
@@ -512,30 +551,6 @@ curl --location 'http://0.0.0.0:8000/chat/completions' \
 ```
 
 
-## Router Settings 
-
-Use this to configure things like routing strategy. 
-
-```yaml
-router_settings:
-  routing_strategy: "least-busy"
-
-model_list: # will route requests to the least busy ollama model
-  - model_name: ollama-models
-    litellm_params: 
-      model: "ollama/mistral"
-      api_base: "http://127.0.0.1:8001"
-  - model_name: ollama-models
-    litellm_params: 
-      model: "ollama/codellama"
-      api_base: "http://127.0.0.1:8002"
-  - model_name: ollama-models
-    litellm_params: 
-      model: "ollama/llama2"
-      api_base: "http://127.0.0.1:8003"
-```
-
-
 ## Configure DB Pool Limits + Connection Timeouts 
 
 ```yaml
diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py
index 40242f5c0..0556ceebb 100644
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@@ -124,7 +124,6 @@ class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callbac
                 start_time,
                 end_time,
             )
-            print_verbose(f"Custom Logger - final response object: {response_obj}")
         except:
             # traceback.print_exc()
             print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
@@ -142,7 +141,6 @@ class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callbac
                 start_time,
                 end_time,
             )
-            print_verbose(f"Custom Logger - final response object: {response_obj}")
         except:
             # traceback.print_exc()
             print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
diff --git a/litellm/llms/aleph_alpha.py b/litellm/llms/aleph_alpha.py
index 7168e7369..3c1bd5dde 100644
--- a/litellm/llms/aleph_alpha.py
+++ b/litellm/llms/aleph_alpha.py
@@ -77,9 +77,9 @@ class AlephAlphaConfig:
     - `control_log_additive` (boolean; default value: true): Method of applying control to attention scores.
     """
 
-    maximum_tokens: Optional[
-        int
-    ] = litellm.max_tokens  # aleph alpha requires max tokens
+    maximum_tokens: Optional[int] = (
+        litellm.max_tokens
+    )  # aleph alpha requires max tokens
     minimum_tokens: Optional[int] = None
     echo: Optional[bool] = None
     temperature: Optional[int] = None
@@ -285,7 +285,10 @@ def completion(
         ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
         prompt_tokens = len(encoding.encode(prompt))
         completion_tokens = len(
-            encoding.encode(model_response["choices"][0]["message"]["content"])
+            encoding.encode(
+                model_response["choices"][0]["message"]["content"],
+                disallowed_special=(),
+            )
         )
 
         model_response["created"] = int(time.time())
diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py
index dec87a61c..c78a71dba 100644
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@@ -575,6 +575,7 @@ def anthropic_messages_pt(messages: list):
         if messages[i]["role"] == "assistant":
             last_assistant_message_idx = i
 
+    new_messages.append(messages[-1])
     if last_assistant_message_idx is not None:
         new_messages[last_assistant_message_idx]["content"] = new_messages[
             last_assistant_message_idx
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 47b8c1535..628f55852 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -1067,20 +1067,22 @@ async def update_database(
                     )
                     data_list.append(existing_spend_obj)
 
-                    # Update the cost column for the given user id
-                    if prisma_client is not None:
-                        await prisma_client.update_data(
-                            data_list=data_list,
-                            query_type="update_many",
-                            table_name="user",
-                        )
-                    elif custom_db_client is not None and user_id is not None:
+                    if custom_db_client is not None and user_id is not None:
                         new_spend = data_list[0].spend
                         await custom_db_client.update_data(
                             key=user_id, value={"spend": new_spend}, table_name="user"
                         )
+                # Update the cost column for the given user id
+                if prisma_client is not None:
+                    await prisma_client.update_data(
+                        data_list=data_list,
+                        query_type="update_many",
+                        table_name="user",
+                    )
             except Exception as e:
-                verbose_proxy_logger.info(f"Update User DB call failed to execute")
+                verbose_proxy_logger.info(
+                    f"Update User DB call failed to execute {str(e)}"
+                )
 
         ### UPDATE KEY SPEND ###
         async def _update_key_db():
@@ -1215,7 +1217,9 @@ async def update_database(
                     await custom_db_client.insert_data(payload, table_name="spend")
 
             except Exception as e:
-                verbose_proxy_logger.info(f"Update Spend Logs DB failed to execute")
+                verbose_proxy_logger.info(
+                    f"Update Spend Logs DB failed to execute - {str(e)}"
+                )
 
         ### UPDATE KEY SPEND ###
         async def _update_team_db():
@@ -1286,7 +1290,9 @@ async def update_database(
                         valid_token.spend = new_spend
                         user_api_key_cache.set_cache(key=token, value=valid_token)
             except Exception as e:
-                verbose_proxy_logger.info(f"Update Team DB failed to execute")
+                verbose_proxy_logger.info(
+                    f"Update Team DB failed to execute - {str(e)}"
+                )
 
         asyncio.create_task(_update_user_db())
         asyncio.create_task(_update_key_db())
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index 959f2b714..9e3a374ec 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -64,7 +64,7 @@ class ProxyLogging:
         litellm.callbacks.append(self.max_parallel_request_limiter)
         litellm.callbacks.append(self.max_budget_limiter)
         litellm.callbacks.append(self.cache_control_check)
-        # litellm.callbacks.append(self.response_taking_too_long_callback)
+        litellm.success_callback.append(self.response_taking_too_long_callback)
         for callback in litellm.callbacks:
             if callback not in litellm.input_callback:
                 litellm.input_callback.append(callback)
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 57b0e436f..f5e145769 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -82,6 +82,23 @@ def test_completion_claude():
 # test_completion_claude()
 
 
+def test_completion_claude_3_empty_response():
+    messages = [
+        {
+            "role": "system",
+            "content": "You are 2twNLGfqk4GMOn3ffp4p.",
+        },
+        {"role": "user", "content": "Hi gm!"},
+        {"role": "assistant", "content": "Good morning! How are you doing today?"},
+        {
+            "role": "user",
+            "content": "I was hoping we could chat a bit",
+        },
+    ]
+    response = litellm.completion(model="claude-3-opus-20240229", messages=messages)
+    print(response)
+
+
 def test_completion_claude_3():
     litellm.set_verbose = True
     messages = [{"role": "user", "content": "Hello, world"}]
diff --git a/litellm/utils.py b/litellm/utils.py
index d4d85cad1..68dc137af 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -225,9 +225,28 @@ class ChatCompletionDeltaToolCall(OpenAIObject):
 
 
 class ChatCompletionMessageToolCall(OpenAIObject):
-    id: str
-    function: Function
-    type: str
+    def __init__(
+        self,
+        function: Union[Dict, Function],
+        id: Optional[str] = None,
+        type: Optional[str] = None,
+        **params,
+    ):
+        super(ChatCompletionMessageToolCall, self).__init__(**params)
+        if isinstance(function, Dict):
+            self.function = Function(**function)
+        else:
+            self.function = function
+
+        if id is not None:
+            self.id = id
+        else:
+            self.id = f"{uuid.uuid4()}"
+
+        if type is not None:
+            self.type = type
+        else:
+            self.type = "function"
 
 
 class Message(OpenAIObject):
@@ -772,10 +791,10 @@ class ImageResponse(OpenAIObject):
 
 
 ############################################################
-def print_verbose(print_statement):
+def print_verbose(print_statement, logger_only: bool = False):
     try:
         verbose_logger.debug(print_statement)
-        if litellm.set_verbose:
+        if litellm.set_verbose == True and logger_only == False:
             print(print_statement)  # noqa
     except:
         pass
@@ -1738,9 +1757,10 @@ class Logging:
                             end_time=end_time,
                         )
                 if callable(callback):  # custom logger functions
-                    print_verbose(
-                        f"Making async function logging call for {callback}, result={result} - {self.model_call_details}"
-                    )
+                    # print_verbose(
+                    #     f"Making async function logging call for {callback}, result={result} - {self.model_call_details}",
+                    #     logger_only=True,
+                    # )
                     if self.stream:
                         if (
                             "async_complete_streaming_response"
@@ -6231,7 +6251,7 @@ def convert_to_model_response_object(
 
             return model_response_object
     except Exception as e:
-        raise Exception(f"Invalid response object {e}")
+        raise Exception(f"Invalid response object {traceback.format_exc()}")
 
 
 def acreate(*args, **kwargs):  ## Thin client to handle the acreate langchain call
diff --git a/proxy_server_config.yaml b/proxy_server_config.yaml
index b65cefe79..64183f216 100644
--- a/proxy_server_config.yaml
+++ b/proxy_server_config.yaml
@@ -40,8 +40,8 @@ litellm_settings:
   budget_duration: 30d
 general_settings: 
   master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234)
-  proxy_budget_rescheduler_min_time: 3
-  proxy_budget_rescheduler_max_time: 6
+  proxy_budget_rescheduler_min_time: 10
+  proxy_budget_rescheduler_max_time: 12
   # database_url: "postgresql://<user>:<password>@<host>:<port>/<dbname>" # [OPTIONAL] use for token-based auth to proxy
 
 environment_variables:
diff --git a/pyproject.toml b/pyproject.toml
index 8de9a9f10..6d42edd23 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.29.2"
+version = "1.29.4"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@@ -74,7 +74,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.commitizen]
-version = "1.29.2"
+version = "1.29.4"
 version_files = [
     "pyproject.toml:^version"
 ]
diff --git a/tests/test_keys.py b/tests/test_keys.py
index db21a2176..5a7b79e1c 100644
--- a/tests/test_keys.py
+++ b/tests/test_keys.py
@@ -469,7 +469,7 @@ async def test_key_with_budgets():
                 break
             except:
                 i + 1
-                await asyncio.sleep(5)
+                await asyncio.sleep(10)
         assert reset_at_init_value != reset_at_new_value