diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index 593e38f0d..5e9be1663 100644
Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index 4f22f8da5..61ed4069b 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py
index 1301b7407..10d721478 100644
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@@ -214,6 +214,31 @@ def test_completion_cohere_stream():
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
+def test_completion_claude_stream():
+    try:
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {
+                "role": "user",
+                "content": "how does a court case get to the Supreme Court?",
+            },
+        ]
+        response = completion(
+            model="claude-instant-1", messages=messages, stream=True, max_tokens=50
+        )
+        complete_response = ""
+        # Add any assertions here to check the response
+        for idx, chunk in enumerate(response):
+            chunk, finished = streaming_format_tests(idx, chunk)
+            if finished:
+                break
+            complete_response += chunk
+        if complete_response.strip() == "": 
+            raise Exception("Empty response received")
+        print(f"completion_response: {complete_response}")
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+# test_completion_claude_stream()
 
 def test_completion_bedrock_ai21_stream():
     try:
@@ -327,28 +352,6 @@ def test_together_ai_completion_call_starcoder():
         print(f"error occurred: {traceback.format_exc()}")
         pass
 
-def test_completion_nlp_cloud_streaming():
-    try:
-        messages = [
-            {"role": "system", "content": "You are a helpful assistant."},
-            {
-                "role": "user",
-                "content": "how does a court case get to the Supreme Court?",
-            },
-        ]
-        response = completion(model="dolphin", messages=messages, stream=True, logger_fn=logger_fn)
-        complete_response = "" 
-        # Add any assertions here to check the response
-        for idx, chunk in enumerate(response):
-            chunk, finished = streaming_format_tests(idx, chunk)
-            if finished:
-                break
-            complete_response += chunk
-        if complete_response == "":
-            raise Exception("Empty response received")
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-
 #### Test Function calling + streaming ####
 
 def test_completion_openai_with_functions():
diff --git a/litellm/utils.py b/litellm/utils.py
index 7e6b961fa..9431faa3b 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -2506,69 +2506,71 @@ class CustomStreamWrapper:
                 return chunk_data['outputText']
         return ""
 
+    ## needs to handle the empty string case (even starting chunk can be an empty string)
     def __next__(self):
         model_response = ModelResponse(stream=True, model=self.model)
         try:
-            # return this for all models
-            completion_obj = {"content": ""}
-            if self.sent_first_chunk == False:
-                completion_obj["role"] = "assistant"
-                self.sent_first_chunk = True
-            if self.custom_llm_provider and self.custom_llm_provider == "anthropic":
-                chunk = next(self.completion_stream)
-                completion_obj["content"] = self.handle_anthropic_chunk(chunk)
-            elif self.model == "replicate" or self.custom_llm_provider == "replicate":
-                chunk = next(self.completion_stream)
-                completion_obj["content"] = chunk
-            elif (
-                self.custom_llm_provider and self.custom_llm_provider == "together_ai"):
-                chunk = next(self.completion_stream)
-                text_data = self.handle_together_ai_chunk(chunk)
-                if text_data == "":
-                    return self.__next__()
-                completion_obj["content"] = text_data
-            elif self.custom_llm_provider and self.custom_llm_provider == "huggingface":
-                chunk = next(self.completion_stream)
-                completion_obj["content"] = self.handle_huggingface_chunk(chunk)
-            elif self.custom_llm_provider and self.custom_llm_provider == "baseten": # baseten doesn't provide streaming
-                chunk = next(self.completion_stream)
-                completion_obj["content"] = self.handle_baseten_chunk(chunk)
-            elif self.custom_llm_provider and self.custom_llm_provider == "ai21": #ai21 doesn't provide streaming
-                chunk = next(self.completion_stream)
-                completion_obj["content"] = self.handle_ai21_chunk(chunk)
-            elif self.custom_llm_provider and self.custom_llm_provider == "vllm":
-                chunk = next(self.completion_stream)
-                completion_obj["content"] = chunk[0].outputs[0].text
-            elif self.custom_llm_provider and self.custom_llm_provider == "aleph-alpha": #aleph alpha doesn't provide streaming
-                chunk = next(self.completion_stream)
-                completion_obj["content"] = self.handle_aleph_alpha_chunk(chunk)
-            elif self.custom_llm_provider and self.custom_llm_provider == "text-completion-openai":
-                chunk = next(self.completion_stream)
-                completion_obj["content"] = self.handle_openai_text_completion_chunk(chunk)
-            elif self.model in litellm.nlp_cloud_models or self.custom_llm_provider == "nlp_cloud":
-                chunk = next(self.completion_stream)
-                completion_obj["content"] = self.handle_nlp_cloud_chunk(chunk)
-            elif self.model in (litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models):
-                chunk = next(self.completion_stream)
-                completion_obj["content"] = str(chunk)
-            elif self.custom_llm_provider == "cohere":
-                chunk = next(self.completion_stream)
-                completion_obj["content"] = self.handle_cohere_chunk(chunk)
-            elif self.custom_llm_provider == "bedrock":
-                completion_obj["content"] = self.handle_bedrock_stream()
-            else: # openai chat/azure models
-                chunk = next(self.completion_stream)
-                model_response = chunk
+            while True: # loop until a non-empty string is found
+                # return this for all models
+                completion_obj = {"content": ""}
+                if self.custom_llm_provider and self.custom_llm_provider == "anthropic":
+                    chunk = next(self.completion_stream)
+                    completion_obj["content"] = self.handle_anthropic_chunk(chunk)
+                elif self.model == "replicate" or self.custom_llm_provider == "replicate":
+                    chunk = next(self.completion_stream)
+                    completion_obj["content"] = chunk
+                elif (
+                    self.custom_llm_provider and self.custom_llm_provider == "together_ai"):
+                    chunk = next(self.completion_stream)
+                    text_data = self.handle_together_ai_chunk(chunk)
+                    if text_data == "":
+                        return self.__next__()
+                    completion_obj["content"] = text_data
+                elif self.custom_llm_provider and self.custom_llm_provider == "huggingface":
+                    chunk = next(self.completion_stream)
+                    completion_obj["content"] = self.handle_huggingface_chunk(chunk)
+                elif self.custom_llm_provider and self.custom_llm_provider == "baseten": # baseten doesn't provide streaming
+                    chunk = next(self.completion_stream)
+                    completion_obj["content"] = self.handle_baseten_chunk(chunk)
+                elif self.custom_llm_provider and self.custom_llm_provider == "ai21": #ai21 doesn't provide streaming
+                    chunk = next(self.completion_stream)
+                    completion_obj["content"] = self.handle_ai21_chunk(chunk)
+                elif self.custom_llm_provider and self.custom_llm_provider == "vllm":
+                    chunk = next(self.completion_stream)
+                    completion_obj["content"] = chunk[0].outputs[0].text
+                elif self.custom_llm_provider and self.custom_llm_provider == "aleph-alpha": #aleph alpha doesn't provide streaming
+                    chunk = next(self.completion_stream)
+                    completion_obj["content"] = self.handle_aleph_alpha_chunk(chunk)
+                elif self.custom_llm_provider and self.custom_llm_provider == "text-completion-openai":
+                    chunk = next(self.completion_stream)
+                    completion_obj["content"] = self.handle_openai_text_completion_chunk(chunk)
+                elif self.model in litellm.nlp_cloud_models or self.custom_llm_provider == "nlp_cloud":
+                    chunk = next(self.completion_stream)
+                    completion_obj["content"] = self.handle_nlp_cloud_chunk(chunk)
+                elif self.model in (litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models):
+                    chunk = next(self.completion_stream)
+                    completion_obj["content"] = str(chunk)
+                elif self.custom_llm_provider == "cohere":
+                    chunk = next(self.completion_stream)
+                    completion_obj["content"] = self.handle_cohere_chunk(chunk)
+                elif self.custom_llm_provider == "bedrock":
+                    completion_obj["content"] = self.handle_bedrock_stream()
+                else: # openai chat/azure models
+                    chunk = next(self.completion_stream)
+                    model_response = chunk
+                    # LOGGING
+                    threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
+                    return model_response
+                
                 # LOGGING
                 threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
-                return model_response
-
-            # LOGGING
-            threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
-            model_response.model = self.model
-            if len(completion_obj["content"]) > 0: # cannot set content of an OpenAI Object to be an empty string
-                model_response.choices[0].delta = Delta(**completion_obj)
-            return model_response
+                model_response.model = self.model
+                if len(completion_obj["content"]) > 0: # cannot set content of an OpenAI Object to be an empty string
+                    if self.sent_first_chunk == False:
+                        completion_obj["role"] = "assistant"
+                        self.sent_first_chunk = True
+                    model_response.choices[0].delta = Delta(**completion_obj)
+                    return model_response
         except StopIteration:
             raise StopIteration
         except Exception as e:
diff --git a/pyproject.toml b/pyproject.toml
index 843bdea01..fb91205e5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.685"
+version = "0.1.686"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"