diff --git a/litellm/llms/watsonx.py b/litellm/llms/watsonx.py
index 5649b714a..a3d315557 100644
--- a/litellm/llms/watsonx.py
+++ b/litellm/llms/watsonx.py
@@ -25,7 +25,13 @@ import requests  # type: ignore
 
 import litellm
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
-from litellm.utils import ModelResponse, Usage, get_secret
+from litellm.utils import (
+    EmbeddingResponse,
+    ModelResponse,
+    Usage,
+    get_secret,
+    map_finish_reason,
+)
 
 from .base import BaseLLM
 from .prompt_templates import factory as ptf
@@ -414,14 +420,16 @@ class IBMWatsonXAI(BaseLLM):
         generated_text = json_resp["results"][0]["generated_text"]
         prompt_tokens = json_resp["results"][0]["input_token_count"]
         completion_tokens = json_resp["results"][0]["generated_token_count"]
-        model_response["choices"][0]["message"]["content"] = generated_text
-        model_response["finish_reason"] = json_resp["results"][0]["stop_reason"]
+        model_response.choices[0].message.content = generated_text  # type: ignore
+        model_response.choices[0].finish_reason = map_finish_reason(
+            json_resp["results"][0]["stop_reason"]
+        )
         if json_resp.get("created_at"):
-            model_response["created"] = datetime.fromisoformat(
-                json_resp["created_at"]
-            ).timestamp()
+            model_response.created = int(
+                datetime.fromisoformat(json_resp["created_at"]).timestamp()
+            )
         else:
-            model_response["created"] = int(time.time())
+            model_response.created = int(time.time())
         usage = Usage(
             prompt_tokens=prompt_tokens,
             completion_tokens=completion_tokens,
@@ -463,7 +471,7 @@ class IBMWatsonXAI(BaseLLM):
         prompt = convert_messages_to_prompt(
             model, messages, provider, custom_prompt_dict
         )
-        model_response["model"] = model
+        model_response.model = model
 
         def process_stream_response(
             stream_resp: Union[Iterator[str], AsyncIterator],
@@ -551,10 +559,10 @@ class IBMWatsonXAI(BaseLLM):
             raise WatsonXAIError(status_code=500, message=str(e))
 
     def _process_embedding_response(
-        self, json_resp: dict, model_response: Union[ModelResponse, None] = None
-    ) -> ModelResponse:
+        self, json_resp: dict, model_response: Optional[EmbeddingResponse] = None
+    ) -> EmbeddingResponse:
         if model_response is None:
-            model_response = ModelResponse(model=json_resp.get("model_id", None))
+            model_response = EmbeddingResponse(model=json_resp.get("model_id", None))
         results = json_resp.get("results", [])
         embedding_response = []
         for idx, result in enumerate(results):
@@ -565,8 +573,8 @@ class IBMWatsonXAI(BaseLLM):
                     "embedding": result["embedding"],
                 }
             )
-        model_response["object"] = "list"
-        model_response["data"] = embedding_response
+        model_response.object = "list"
+        model_response.data = embedding_response
         input_tokens = json_resp.get("input_token_count", 0)
         setattr(
             model_response,
@@ -635,12 +643,12 @@ class IBMWatsonXAI(BaseLLM):
         }
         request_manager = RequestManager(logging_obj)
 
-        def handle_embedding(request_params: dict) -> ModelResponse:
+        def handle_embedding(request_params: dict) -> EmbeddingResponse:
             with request_manager.request(request_params, input=input) as resp:
                 json_resp = resp.json()
             return self._process_embedding_response(json_resp, model_response)
 
-        async def handle_aembedding(request_params: dict) -> ModelResponse:
+        async def handle_aembedding(request_params: dict) -> EmbeddingResponse:
             async with request_manager.async_request(
                 request_params, input=input
             ) as resp:
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index 4ae88a745..2e0ae425c 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -624,10 +624,6 @@ class ModelResponse(OpenAIObject):
         # Allow dictionary-style access to attributes
         return getattr(self, key)
 
-    def __setitem__(self, key, value):
-        # Allow dictionary-style assignment of attributes
-        setattr(self, key, value)
-
     def json(self, **kwargs):
         try:
             return self.model_dump()  # noqa