diff --git a/litellm/utils.py b/litellm/utils.py
index 0cf4f9f16..045506b22 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -5482,17 +5482,21 @@ def get_optional_params(
             optional_params["random_seed"] = seed
         if stop is not None:
             optional_params["stop_sequences"] = stop
-        
+
         # WatsonX-only parameters
         extra_body = {}
         if "decoding_method" in passed_params:
             extra_body["decoding_method"] = passed_params.pop("decoding_method")
-        if "min_tokens" in passed_params or  "min_new_tokens" in passed_params:
-            extra_body["min_new_tokens"] = passed_params.pop("min_tokens", passed_params.pop("min_new_tokens"))
+        if "min_tokens" in passed_params or "min_new_tokens" in passed_params:
+            extra_body["min_new_tokens"] = passed_params.pop(
+                "min_tokens", passed_params.pop("min_new_tokens")
+            )
         if "top_k" in passed_params:
             extra_body["top_k"] = passed_params.pop("top_k")
         if "truncate_input_tokens" in passed_params:
-            extra_body["truncate_input_tokens"] = passed_params.pop("truncate_input_tokens")
+            extra_body["truncate_input_tokens"] = passed_params.pop(
+                "truncate_input_tokens"
+            )
         if "length_penalty" in passed_params:
             extra_body["length_penalty"] = passed_params.pop("length_penalty")
         if "time_limit" in passed_params:
@@ -5500,7 +5504,7 @@ def get_optional_params(
         if "return_options" in passed_params:
             extra_body["return_options"] = passed_params.pop("return_options")
         optional_params["extra_body"] = (
-            extra_body # openai client supports `extra_body` param
+            extra_body  # openai client supports `extra_body` param
         )
     else:  # assume passing in params for openai/azure openai
         print_verbose(
@@ -9829,7 +9833,7 @@ class CustomStreamWrapper:
                 "is_finished": chunk["is_finished"],
                 "finish_reason": finish_reason,
             }
-        
+
     def handle_watsonx_stream(self, chunk):
         try:
             if isinstance(chunk, dict):
@@ -9837,19 +9841,21 @@ class CustomStreamWrapper:
             elif isinstance(chunk, (str, bytes)):
                 if isinstance(chunk, bytes):
                     chunk = chunk.decode("utf-8")
-                if 'generated_text' in chunk:
-                    response = chunk.replace('data: ', '').strip()
+                if "generated_text" in chunk:
+                    response = chunk.replace("data: ", "").strip()
                     parsed_response = json.loads(response)
                 else:
                     return {"text": "", "is_finished": False}
             else:
                 print_verbose(f"chunk: {chunk} (Type: {type(chunk)})")
-                raise ValueError(f"Unable to parse response. Original response: {chunk}")
+                raise ValueError(
+                    f"Unable to parse response. Original response: {chunk}"
+                )
             results = parsed_response.get("results", [])
             if len(results) > 0:
                 text = results[0].get("generated_text", "")
                 finish_reason = results[0].get("stop_reason")
-                is_finished = finish_reason != 'not_finished'
+                is_finished = finish_reason != "not_finished"
                 return {
                     "text": text,
                     "is_finished": is_finished,
@@ -10119,16 +10125,6 @@ class CustomStreamWrapper:
             elif self.custom_llm_provider == "watsonx":
                 response_obj = self.handle_watsonx_stream(chunk)
                 completion_obj["content"] = response_obj["text"]
-                print_verbose(f"completion obj content: {completion_obj['content']}")
-                if response_obj.get("prompt_tokens") is not None:
-                    prompt_token_count = getattr(model_response.usage, "prompt_tokens", 0)
-                    model_response.usage.prompt_tokens = (prompt_token_count+response_obj["prompt_tokens"])
-                if response_obj.get("completion_tokens") is not None:
-                    model_response.usage.completion_tokens = response_obj["completion_tokens"]
-                model_response.usage.total_tokens = (
-                    getattr(model_response.usage, "prompt_tokens", 0)
-                    + getattr(model_response.usage, "completion_tokens", 0)
-                )
                 if response_obj["is_finished"]:
                     self.received_finish_reason = response_obj["finish_reason"]
             elif self.custom_llm_provider == "text-completion-openai":