diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py
index fdc6de8fa..ea951f087 100644
--- a/litellm/llms/openai.py
+++ b/litellm/llms/openai.py
@@ -245,11 +245,11 @@ class OpenAIChatCompletion(BaseLLM):
                           api_base: str, 
                           data: dict, headers: dict, 
                           model_response: ModelResponse): 
-        async with httpx.AsyncClient() as client:
+        async with httpx.AsyncClient(timeout=600) as client:
             response = await client.post(api_base, json=data, headers=headers) 
             response_json = response.json()
-            if response.status != 200:
-                raise OpenAIError(status_code=response.status, message=response.text)
+            if response.status_code != 200:
+                raise OpenAIError(status_code=response.status_code, message=response.text)
                 
 
             ## RESPONSE OBJECT
diff --git a/litellm/main.py b/litellm/main.py
index 155012a35..e5f79b480 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -524,10 +524,7 @@ def completion(
                     additional_args={"headers": headers},
                 )
                 raise e
-            
-            if optional_params.get("stream", False) and acompletion is False:
-                response = CustomStreamWrapper(response, model, custom_llm_provider=custom_llm_provider, logging_obj=logging)
-                return response
+
             ## LOGGING
             logging.post_call(
                 input=messages,
diff --git a/litellm/tests/test_async_fn.py b/litellm/tests/test_async_fn.py
index ba1a375c8..01f559b1f 100644
--- a/litellm/tests/test_async_fn.py
+++ b/litellm/tests/test_async_fn.py
@@ -28,14 +28,13 @@ def test_async_response():
         user_message = "Hello, how are you?"
         messages = [{"content": user_message, "role": "user"}]
         try:
-            response = await acompletion(model="gpt-3.5-turbo-instruct", messages=messages)
+            response = await acompletion(model="gpt-3.5-turbo", messages=messages)
             print(f"response: {response}")
         except Exception as e:
             pytest.fail(f"An exception occurred: {e}")
 
-    response = asyncio.run(test_get_response())
-    print(response)
-# test_async_response()
+    asyncio.run(test_get_response())
+test_async_response()
 
 def test_get_response_streaming():
     import asyncio
@@ -43,7 +42,7 @@ def test_get_response_streaming():
         user_message = "write a short poem in one sentence"
         messages = [{"content": user_message, "role": "user"}]
         try:
-            response = await acompletion(model="gpt-3.5-turbo-instruct", messages=messages, stream=True)
+            response = await acompletion(model="gpt-3.5-turbo", messages=messages, stream=True)
             print(type(response))
 
             import inspect
diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py
index 5a5c22898..9df584b83 100644
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@@ -67,7 +67,7 @@ def test_context_window_with_fallbacks(model):
 
 # for model in litellm.models_by_provider["bedrock"]:
 #     test_context_window(model=model)
-# test_context_window(model="gpt-3.5-turbo")
+test_context_window(model="azure/chatgpt-v-2")
 # test_context_window_with_fallbacks(model="gpt-3.5-turbo")
 # Test 2: InvalidAuth Errors
 @pytest.mark.parametrize("model", models)
diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py
index 316309e04..4870b5ed8 100644
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@@ -131,7 +131,6 @@ def streaming_format_tests(idx, chunk):
     if chunk["choices"][0]["finish_reason"]: # ensure finish reason is only in last chunk
         validate_last_format(chunk=chunk)
         finished = True
-    print(f"chunk choices: {chunk['choices'][0]['delta']['content']}")
     if "content" in chunk["choices"][0]["delta"]:
         extracted_chunk = chunk["choices"][0]["delta"]["content"]
     print(f"extracted chunk: {extracted_chunk}")
@@ -837,6 +836,7 @@ def test_openai_chat_completion_call():
         start_time = time.time()
         for idx, chunk in enumerate(response):
             chunk, finished = streaming_format_tests(idx, chunk)
+            print(f"outside chunk: {chunk}")
             if finished:
                 break
             complete_response += chunk
diff --git a/litellm/utils.py b/litellm/utils.py
index c7a7fb292..8906c6a5a 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -4549,7 +4549,7 @@ class CustomStreamWrapper:
         except StopIteration:
             raise StopIteration
         except Exception as e: 
-            traceback_exception = traceback.print_exc()
+            traceback_exception = traceback.format_exc()
             e.message = str(e)
              # LOG FAILURE - handle streaming failure logging in the _next_ object, remove `handle_failure` once it's deprecated
             threading.Thread(target=self.logging_obj.failure_handler, args=(e, traceback_exception)).start()
@@ -4557,18 +4557,25 @@ class CustomStreamWrapper:
 
     ## needs to handle the empty string case (even starting chunk can be an empty string)
     def __next__(self):
-        while True: # loop until a non-empty string is found
-            try:
-                # if isinstance(self.completion_stream, str):
-                #     chunk = self.completion_stream
-                # else:
-                chunk = next(self.completion_stream)
-                response = self.chunk_creator(chunk=chunk)
-                # if response is not None:
-                return response
-            except Exception as e:
-                raise StopIteration
-    
+        try:
+            while True: 
+                if isinstance(self.completion_stream, str):
+                    chunk = self.completion_stream
+                else:
+                    chunk = next(self.completion_stream)
+
+                if chunk is not None:
+                    response = self.chunk_creator(chunk=chunk)
+                    if response is not None:
+                        return response
+        except StopIteration:
+            raise  # Re-raise StopIteration
+        except Exception as e:
+            # Handle other exceptions if needed
+            pass
+
+
+        
     async def __anext__(self):
         try:
             if (self.custom_llm_provider == "openai"