diff --git a/dist/litellm-1.12.6.dev3-py3-none-any.whl b/dist/litellm-1.12.6.dev3-py3-none-any.whl
new file mode 100644
index 000000000..81290067d
Binary files /dev/null and b/dist/litellm-1.12.6.dev3-py3-none-any.whl differ
diff --git a/dist/litellm-1.12.6.dev3.tar.gz b/dist/litellm-1.12.6.dev3.tar.gz
new file mode 100644
index 000000000..2dbe390eb
Binary files /dev/null and b/dist/litellm-1.12.6.dev3.tar.gz differ
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 5c63e00a6..7b9993286 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -1084,7 +1084,52 @@ def test_completion_chat_sagemaker():
         assert len(complete_response) > 0
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
-test_completion_chat_sagemaker()
+# test_completion_chat_sagemaker()
+
+
+def test_completion_chat_sagemaker():
+    try:
+        messages = [{"role": "user", "content": "Hey, how's it going?"}]
+        litellm.set_verbose=True
+        response = completion(
+            model="sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4", 
+            messages=messages,
+            max_tokens=100,
+            temperature=0.7,
+            stream=True,
+        )
+        # Add any assertions here to check the response 
+        complete_response = "" 
+        for chunk in response:
+            complete_response += chunk.choices[0].delta.content or "" 
+        print(f"complete_response: {complete_response}")
+        assert len(complete_response) > 0
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+import asyncio
+@pytest.mark.asyncio
+async def test_completion_chat_sagemaker(): 
+    try: 
+        messages = [{"role": "user", "content": "Hey, how's it going?"}]
+        litellm.set_verbose=True
+        response = await litellm.acompletion(
+            model="sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4", 
+            messages=messages,
+            max_tokens=100,
+            temperature=0.7,
+            stream=True,
+        )
+        # Add any assertions here to check the response 
+        complete_response = "" 
+        async for chunk in response:
+            complete_response += chunk.choices[0].delta.content or "" 
+        print(f"complete_response: {complete_response}")
+        assert len(complete_response) > 0
+    except: 
+        pass
+
+asyncio.run(test_completion_chat_sagemaker())
 
 def test_completion_chat_sagemaker_mistral(): 
     try: 
diff --git a/litellm/utils.py b/litellm/utils.py
index 8f4f76db8..d25bf310a 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -5692,6 +5692,7 @@ class CustomStreamWrapper:
                 if chunk is not None and chunk != b'':
                     print_verbose(f"PROCESSED CHUNK PRE CHUNK CREATOR: {chunk}")
                     response = self.chunk_creator(chunk=chunk)
+                    print_verbose(f"PROCESSED CHUNK POST CHUNK CREATOR: {chunk}")
                     if response is None: 
                         continue
                     ## LOGGING
@@ -5700,6 +5701,7 @@ class CustomStreamWrapper:
         except StopIteration:
             raise  # Re-raise StopIteration
         except Exception as e:
+            print_verbose(f"HITS AN ERROR: {str(e)}")
             traceback_exception = traceback.format_exc()
             # LOG FAILURE - handle streaming failure logging in the _next_ object, remove `handle_failure` once it's deprecated
             threading.Thread(target=self.logging_obj.failure_handler, args=(e, traceback_exception)).start()
@@ -5731,8 +5733,8 @@ class CustomStreamWrapper:
                 # example - boto3 bedrock llms
                 print_verbose(f"ENTERS __NEXT__ LOOP")
                 processed_chunk = next(self)
-                asyncio.create_task(self.logging_obj.async_success_handler(processed_chunk,))
                 print_verbose(f"PROCESSED CHUNK IN __ANEXT__: {processed_chunk}")
+                asyncio.create_task(self.logging_obj.async_success_handler(processed_chunk,))
                 return processed_chunk
         except StopAsyncIteration:
             raise