From 3563ae81a8777c1d52fff75c928bfb96fe3cba0e Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 26 Oct 2023 12:10:37 -0700
Subject: [PATCH] (docs) improve async + streaming completion

---
 docs/my-website/docs/completion/stream.md | 32 +++++++----------------
 1 file changed, 10 insertions(+), 22 deletions(-)

diff --git a/docs/my-website/docs/completion/stream.md b/docs/my-website/docs/completion/stream.md
index a40b462cc..413076dc9 100644
--- a/docs/my-website/docs/completion/stream.md
+++ b/docs/my-website/docs/completion/stream.md
@@ -2,11 +2,13 @@
 
 - [Streaming Responses](#streaming-responses)
 - [Async Completion](#async-completion)
+- [Async + Streaming Completion](#async-streaming)
 
 ## Streaming Responses
 LiteLLM supports streaming the model response back by passing `stream=True` as an argument to the completion function
 ### Usage
 ```python
+from litellm import completion
 response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
 for chunk in response:
     print(chunk['choices'][0]['delta'])
@@ -37,34 +39,20 @@ We've implemented an `__anext__()` function in the streaming object returned. Th
 ### Usage
 Here's an example of using it with openai.
 ```python
-from litellm import completion
-import asyncio, os, traceback, time
-
-os.environ["OPENAI_API_KEY"] = "your-api-key"
-
-def logger_fn(model_call_object: dict):
-    print(f"LOGGER FUNCTION: {model_call_object}")
-
-
-user_message = "Hello, how are you?"
-messages = [{"content": user_message, "role": "user"}]
+from litellm import acompletion
+import asyncio, os, traceback
 
 async def completion_call():
     try:
-        response = completion(
-            model="gpt-3.5-turbo", messages=messages, stream=True, logger_fn=logger_fn
+        print("test acompletion + streaming")
+        response = await acompletion(
+            model="gpt-3.5-turbo", 
+            messages=[{"content": "Hello, how are you?", "role": "user"}], 
+            stream=True
         )
         print(f"response: {response}")
-        complete_response = ""
-        start_time = time.time()
-        # Change for loop to async for loop
         async for chunk in response:
-            chunk_time = time.time()
-            print(f"time since initial request: {chunk_time - start_time:.5f}")
-            print(chunk["choices"][0]["delta"])
-            complete_response += chunk["choices"][0]["delta"].get("content", "")
-        if complete_response == "": 
-            raise Exception("Empty response received")
+            print(chunk)
     except:
         print(f"error occurred: {traceback.format_exc()}")
         pass