diff --git a/litellm/main.py b/litellm/main.py
index 278399d5f..1ddefc756 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -2472,22 +2472,22 @@ async def atext_completion(*args, **kwargs):
             or custom_llm_provider == "ollama"
             or custom_llm_provider == "vertex_ai"
         ):  # currently implemented aiohttp calls for just azure and openai, soon all.
-            if kwargs.get("stream", False):
-                response = text_completion(*args, **kwargs)
-            else:
-                # Await normally
-                response = await loop.run_in_executor(None, func_with_context)
-                if asyncio.iscoroutine(response):
-                    response = await response
+            # Await normally
+            response = await loop.run_in_executor(None, func_with_context)
+            if asyncio.iscoroutine(response):
+                response = await response
         else:
             # Call the synchronous function using run_in_executor
             response = await loop.run_in_executor(None, func_with_context)
-        if kwargs.get("stream", False):  # return an async generator
-            return _async_streaming(
-                response=response,
+        if kwargs.get("stream", False) == True:  # return an async generator
+            return TextCompletionStreamWrapper(
+                completion_stream=_async_streaming(
+                    response=response,
+                    model=model,
+                    custom_llm_provider=custom_llm_provider,
+                    args=args,
+                ),
                 model=model,
-                custom_llm_provider=custom_llm_provider,
-                args=args,
             )
         else:
             return response
@@ -2691,11 +2691,11 @@ def text_completion(
         **kwargs,
         **optional_params,
     )
+    if kwargs.get("acompletion", False) == True:
+        return response
     if stream == True or kwargs.get("stream", False) == True:
         response = TextCompletionStreamWrapper(completion_stream=response, model=model)
         return response
-    if kwargs.get("acompletion", False) == True:
-        return response
     transformed_logprobs = None
     # only supported for TGI models
     try:
diff --git a/litellm/tests/test_async_fn.py b/litellm/tests/test_async_fn.py
index 485e86e7f..547abb533 100644
--- a/litellm/tests/test_async_fn.py
+++ b/litellm/tests/test_async_fn.py
@@ -215,3 +215,30 @@ def test_get_response_non_openai_streaming():
 
 
 # test_get_response_non_openai_streaming()
+
+
+async def test_get_response():
+    try:
+        response = await litellm.atext_completion(
+            model="gpt-3.5-turbo",
+            prompt="good morning",
+            stream=True,
+            max_tokens=10,
+        )
+        print(f"response: {response}")
+
+        num_finish_reason = 0
+        async for chunk in response:
+            print(chunk)
+            if chunk["choices"][0].get("finish_reason") is not None:
+                num_finish_reason += 1
+                print("finish_reason", chunk["choices"][0].get("finish_reason"))
+
+        assert (
+            num_finish_reason == 1
+        ), f"expected only one finish reason. Got {num_finish_reason}"
+    except Exception as e:
+        pytest.fail(f"GOT exception for gpt-3.5 instruct In streaming{e}")
+
+
+# asyncio.run(test_get_response())