diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc
index f687fe5ec..dabe860d7 100644
Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ
diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index 57f2e449a..6b42eec17 100644
Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index 0062150af..fbebc9cd5 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/main.py b/litellm/main.py
index 43fcafbb7..9edf423e0 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -17,7 +17,8 @@ from litellm.utils import (
     CustomStreamWrapper,
     read_config_args,
     completion_with_fallbacks,
-    get_llm_provider
+    get_llm_provider,
+    mock_completion_streaming_obj
 )
 from .llms import anthropic
 from .llms import together_ai
@@ -72,30 +73,22 @@ async def acompletion(*args, **kwargs):
     else:
         return response
 
-## Use this in your testing pipeline, if you need to mock an LLM response
 def mock_completion(model: str, messages: List, stream: bool = False, mock_response: str = "This is a mock request", **kwargs):
     try:
-        model_response = ModelResponse()
-        if stream: # return a generator object, iterate through the text in chunks of 3 char / chunk
-            for i in range(0, len(mock_response), 3):
-                completion_obj = {"role": "assistant", "content": mock_response[i: i+3]}
-                yield {
-                "choices": 
-                    [
-                        {
-                            "delta": completion_obj,
-                            "finish_reason": None
-                        },
-                    ]
-                }
-        else:
-            ## RESPONSE OBJECT
-            completion_response = "This is a mock request"
-            model_response["choices"][0]["message"]["content"] = completion_response
-            model_response["created"] = time.time()
-            model_response["model"] = "MockResponse"
-            return model_response
+        model_response = ModelResponse(stream=stream)
+        if stream is True:
+            # don't try to access stream object,
+            response = mock_completion_streaming_obj(model_response, mock_response=mock_response, model=model)
+            return response
+        
+        completion_response = "This is a mock request"
+        model_response["choices"][0]["message"]["content"] = completion_response
+        model_response["created"] = time.time()
+        model_response["model"] = model
+        return model_response
+
     except:
+        traceback.print_exc()
         raise Exception("Mock completion response failed")
 
 @client
diff --git a/litellm/tests/test_mock_request.py b/litellm/tests/test_mock_request.py
index 33a1ac8ba..1997d454d 100644
--- a/litellm/tests/test_mock_request.py
+++ b/litellm/tests/test_mock_request.py
@@ -13,11 +13,13 @@ def test_mock_request():
     try:
         model = "gpt-3.5-turbo"
         messages = [{"role": "user", "content": "Hey, I'm a mock request"}]
-        response = litellm.mock_completion(model=model, messages=messages)
+        response = litellm.mock_completion(model=model, messages=messages, stream=False)
         print(response)
+        print(type(response))
     except:
         traceback.print_exc()
 
+# test_mock_request()
 def test_streaming_mock_request():
     try: 
         model = "gpt-3.5-turbo"
diff --git a/litellm/utils.py b/litellm/utils.py
index ff7a8588c..06149c83e 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -2291,7 +2291,7 @@ class CustomStreamWrapper:
                 # Log the type of the received item
                 self.logging_obj.post_call(str(type(completion_stream)))
         if model in litellm.cohere_models:
-            # cohere does not return an iterator, so we need to wrap it in one
+            # these do not return an iterator, so we need to wrap it in one
             self.completion_stream = iter(completion_stream)
         else:
             self.completion_stream = completion_stream
@@ -2461,6 +2461,12 @@ class CustomStreamWrapper:
             raise StopAsyncIteration
 
 
+def mock_completion_streaming_obj(model_response, mock_response, model):
+    for i in range(0, len(mock_response), 3):
+        completion_obj = {"role": "assistant", "content": mock_response[i: i+3]}
+        model_response.choices[0].delta = completion_obj
+        yield model_response
+
 ########## Reading Config File ############################
 def read_config_args(config_path):
     try:
diff --git a/pyproject.toml b/pyproject.toml
index 17e65d90b..5d259cdc3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.624"
+version = "0.1.625"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"