From e07bf0a8de21b06deb1b0435cbc70b7f64107a0a Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 14 Nov 2023 22:07:28 -0800
Subject: [PATCH] fix(utils.py): await async function in client wrapper

---
 litellm/tests/test_async_fn.py | 3 ++-
 litellm/utils.py               | 4 +---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/litellm/tests/test_async_fn.py b/litellm/tests/test_async_fn.py
index 7d0d3c9a8..7e435d4d0 100644
--- a/litellm/tests/test_async_fn.py
+++ b/litellm/tests/test_async_fn.py
@@ -42,6 +42,7 @@ def test_get_response_streaming():
         user_message = "write a short poem in one sentence"
         messages = [{"content": user_message, "role": "user"}]
         try:
+            litellm.set_verbose = True
             response = await acompletion(model="command-nightly", messages=messages, stream=True)
             print(type(response))
 
@@ -65,7 +66,7 @@ def test_get_response_streaming():
     asyncio.run(test_async_call())
 
 
-# test_get_response_streaming()
+test_get_response_streaming()
 
 def test_get_response_non_openai_streaming():
     import asyncio
diff --git a/litellm/utils.py b/litellm/utils.py
index 24c53d87f..4b75cc8c1 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -1238,7 +1238,7 @@ def client(original_function):
                         else:
                             return cached_result
             # MODEL CALL
-            result = original_function(*args, **kwargs)
+            result = await original_function(*args, **kwargs)
             end_time = datetime.datetime.now()
             if "stream" in kwargs and kwargs["stream"] == True:
                 if "complete_response" in kwargs and kwargs["complete_response"] == True: 
@@ -1248,7 +1248,6 @@ def client(original_function):
                     return litellm.stream_chunk_builder(chunks)
                 else: 
                     return result
-            result = await result 
             # [OPTIONAL] ADD TO CACHE
             if litellm.caching or litellm.caching_with_models or litellm.cache != None: # user init a cache object
                 litellm.cache.add_cache(result, *args, **kwargs)
@@ -4459,7 +4458,6 @@ class CustomStreamWrapper:
             traceback.print_exc()
             raise e
 
-
     def handle_openai_text_completion_chunk(self, chunk):
         try: 
             str_line = chunk