diff --git a/litellm/utils.py b/litellm/utils.py
index a9cc6e465a..c9aca2d567 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -18,7 +18,7 @@ import tiktoken
 import uuid
 import aiohttp
 import logging
-import asyncio
+import asyncio, httpx
 import copy
 from tokenizers import Tokenizer
 from dataclasses import (
@@ -4089,14 +4089,22 @@ def exception_type(
                 llm_provider=custom_llm_provider,
                 response=original_exception.response
             )
-        else:
+        else: # ensure generic errors always return APIConnectionError
             exception_mapping_worked = True
-            raise APIConnectionError(
-                message=f"{str(original_exception)}",
-                llm_provider=custom_llm_provider,
-                model=model,
-                request=original_exception.request
-            )
+            if hasattr(original_exception, "request"):
+                raise APIConnectionError(
+                    message=f"{str(original_exception)}",
+                    llm_provider=custom_llm_provider,
+                    model=model,
+                    request=original_exception.request
+                )
+            else: 
+                raise APIConnectionError( 
+                    message=f"{str(original_exception)}",
+                    llm_provider=custom_llm_provider,
+                    model=model,
+                    request= httpx.Request(method="POST", url="https://api.openai.com/v1/") # stub the request
+                )
     except Exception as e:
         # LOGGING
         exception_logging(
@@ -4400,10 +4408,11 @@ class CustomStreamWrapper:
         elif chunk.startswith("data:"):
             data_json = json.loads(chunk[5:]) # chunk.startswith("data:"):
             try:
-                text = data_json["choices"][0]["delta"].get("content", "") 
-                if data_json["choices"][0].get("finish_reason", None): 
-                    is_finished = True
-                    finish_reason = data_json["choices"][0]["finish_reason"]
+                if len(data_json["choices"]) > 0: 
+                    text = data_json["choices"][0]["delta"].get("content", "") 
+                    if data_json["choices"][0].get("finish_reason", None): 
+                        is_finished = True
+                        finish_reason = data_json["choices"][0]["finish_reason"]
                 print_verbose(f"text: {text}; is_finished: {is_finished}; finish_reason: {finish_reason}")
                 return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
             except:
@@ -4725,7 +4734,7 @@ class CustomStreamWrapper:
             e.message = str(e)
              # LOG FAILURE - handle streaming failure logging in the _next_ object, remove `handle_failure` once it's deprecated
             threading.Thread(target=self.logging_obj.failure_handler, args=(e, traceback_exception)).start()
-            return exception_type(model=self.model, custom_llm_provider=self.custom_llm_provider, original_exception=e)
+            raise exception_type(model=self.model, custom_llm_provider=self.custom_llm_provider, original_exception=e)
 
     ## needs to handle the empty string case (even starting chunk can be an empty string)
     def __next__(self):
@@ -4746,7 +4755,7 @@ class CustomStreamWrapper:
             raise  # Re-raise StopIteration
         except Exception as e:
             # Handle other exceptions if needed
-            pass
+            raise e