(docs) improve async + streaming completion

2023-10-26 12:10:37 -07:00 · 2023-10-26 12:10:37 -07:00 · 3563ae81a8
commit 3563ae81a8
parent 8d2a4a597a
1 changed files with 10 additions and 22 deletions
--- a/docs/my-website/docs/completion/stream.md
+++ b/docs/my-website/docs/completion/stream.md
@ -2,11 +2,13 @@

 - [Streaming Responses](#streaming-responses)
 - [Async Completion](#async-completion)
+- [Async + Streaming Completion](#async-streaming)

 ## Streaming Responses
 LiteLLM supports streaming the model response back by passing `stream=True` as an argument to the completion function
 ### Usage
 ```python
+from litellm import completion
 response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
 for chunk in response:
    print(chunk['choices'][0]['delta'])
@ -37,34 +39,20 @@ We've implemented an `__anext__()` function in the streaming object returned. Th
 ### Usage
 Here's an example of using it with openai.
 ```python
-from litellm import completion
-import asyncio, os, traceback, time
-
-os.environ["OPENAI_API_KEY"] = "your-api-key"
-
-def logger_fn(model_call_object: dict):
-    print(f"LOGGER FUNCTION: {model_call_object}")
-
-
-user_message = "Hello, how are you?"
-messages = [{"content": user_message, "role": "user"}]
+from litellm import acompletion
+import asyncio, os, traceback

 async def completion_call():
    try:
-        response = completion(
-            model="gpt-3.5-turbo", messages=messages, stream=True, logger_fn=logger_fn
+        print("test acompletion + streaming")
+        response = await acompletion(
+            model="gpt-3.5-turbo", 
+            messages=[{"content": "Hello, how are you?", "role": "user"}], 
+            stream=True
        )
        print(f"response: {response}")
-        complete_response = ""
-        start_time = time.time()
-        # Change for loop to async for loop
        async for chunk in response:
-            chunk_time = time.time()
-            print(f"time since initial request: {chunk_time - start_time:.5f}")
-            print(chunk["choices"][0]["delta"])
-            complete_response += chunk["choices"][0]["delta"].get("content", "")
-        if complete_response == "": 
-            raise Exception("Empty response received")
+            print(chunk)
    except:
        print(f"error occurred: {traceback.format_exc()}")
        pass