diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index 7048c99f8..58ce173b3 100644
Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index ef3e1e402..773d47432 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/main.py b/litellm/main.py
index f0238a6e7..6a48c7f57 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -98,6 +98,7 @@ def completion(
         if fallbacks != []:
             return completion_with_fallbacks(**args)
         if litellm.model_alias_map and model in litellm.model_alias_map:
+            args["model_alias_map"] = litellm.model_alias_map
             model = litellm.model_alias_map[model] # update the model to the actual value if an alias has been passed in
         model_response = ModelResponse()
         if azure:  # this flag is deprecated, remove once notebooks are also updated.
diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py
index b6deb2098..b6e37a7e8 100644
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@@ -62,22 +62,22 @@ messages = [{"content": user_message, "role": "user"}]
 
 
 # test on anthropic completion call
-# try:
-#     response = completion(
-#         model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn
-#     )
-#     complete_response = ""
-#     start_time = time.time()
-#     for chunk in response:
-#         chunk_time = time.time()
-#         print(f"time since initial request: {chunk_time - start_time:.5f}")
-#         print(chunk["choices"][0]["delta"])
-#         complete_response += chunk["choices"][0]["delta"]["content"]
-#     if complete_response == "": 
-#         raise Exception("Empty response received")
-# except:
-#     print(f"error occurred: {traceback.format_exc()}")
-#     pass
+try:
+    response = completion(
+        model="claude-instant-1", messages=messages, stream=True, logger_fn=logger_fn
+    )
+    complete_response = ""
+    start_time = time.time()
+    for chunk in response:
+        chunk_time = time.time()
+        print(f"time since initial request: {chunk_time - start_time:.5f}")
+        print(chunk["choices"][0]["delta"])
+        complete_response += chunk["choices"][0]["delta"]["content"]
+    if complete_response == "": 
+        raise Exception("Empty response received")
+except:
+    print(f"error occurred: {traceback.format_exc()}")
+    pass
 
 
 # # test on huggingface completion call
@@ -98,7 +98,7 @@ messages = [{"content": user_message, "role": "user"}]
 #     print(f"error occurred: {traceback.format_exc()}")
 #     pass
 
-# test on together ai completion call
+# test on together ai completion call - replit-code-3b
 try:
     start_time = time.time()
     response = completion(
@@ -117,6 +117,25 @@ except:
     print(f"error occurred: {traceback.format_exc()}")
     pass
 
+# test on together ai completion call - starcoder
+try:
+    start_time = time.time()
+    response = completion(
+        model="together_ai/bigcode/starcoder", messages=messages, logger_fn=logger_fn, stream= True
+    )
+    complete_response = ""
+    print(f"returned response object: {response}")
+    for chunk in response:
+        chunk_time = time.time() 
+        complete_response += chunk["choices"][0]["delta"]["content"] if len(chunk["choices"][0]["delta"].keys()) > 0 else "" 
+        if len(complete_response) > 0:
+            print(complete_response)
+    if complete_response == "": 
+        raise Exception("Empty response received")
+except:
+    print(f"error occurred: {traceback.format_exc()}")
+    pass
+
 
 # # test on azure completion call
 # try:
diff --git a/litellm/utils.py b/litellm/utils.py
index 77ce504cf..719ec6514 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -1468,8 +1468,6 @@ class CustomStreamWrapper:
         if model in litellm.cohere_models:
             # cohere does not return an iterator, so we need to wrap it in one
             self.completion_stream = iter(completion_stream)
-        elif custom_llm_provider == "together_ai":
-            self.completion_stream = iter(completion_stream)
         else:
             self.completion_stream = completion_stream
 
@@ -1512,7 +1510,7 @@ class CustomStreamWrapper:
         elif self.model == "replicate":
             chunk = next(self.completion_stream)
             completion_obj["content"] = chunk
-        elif (self.model == "together_ai") or ("togethercomputer"
+        elif (self.custom_llm_provider and self.custom_llm_provider == "together_ai") or ("togethercomputer"
                                                in self.model):
             chunk = next(self.completion_stream)
             text_data = self.handle_together_ai_chunk(chunk)
diff --git a/pyproject.toml b/pyproject.toml
index 20675dce8..f51666c0c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.490"
+version = "0.1.491"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"