(docs) improve async + streaming completion

2023-10-26 12:10:37 -07:00 · 2023-10-26 12:10:37 -07:00 · 3563ae81a8
commit 3563ae81a8
parent 8d2a4a597a
1 changed files with 10 additions and 22 deletions
--- a/docs/my-website/docs/completion/stream.md
+++ b/docs/my-website/docs/completion/stream.md
@ -2,11 +2,13 @@
 - [Streaming Responses](#streaming-responses)
 - [Async Completion](#async-completion)
 - [Async + Streaming Completion](#async-streaming)
 ## Streaming Responses
 LiteLLM supports streaming the model response back by passing `stream=True` as an argument to the completion function
 ### Usage
 ```python
 from litellm import completion
 response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
 for chunk in response:
    print(chunk['choices'][0]['delta'])
@ -37,34 +39,20 @@ We've implemented an `__anext__()` function in the streaming object returned. Th
 ### Usage
 Here's an example of using it with openai.
 ```python
-from litellm import completion
+from litellm import acompletion
-import asyncio, os, traceback, time
+import asyncio, os, traceback
 os.environ["OPENAI_API_KEY"] = "your-api-key"
 def logger_fn(model_call_object: dict):
    print(f"LOGGER FUNCTION: {model_call_object}")
 user_message = "Hello, how are you?"
 messages = [{"content": user_message, "role": "user"}]
 async def completion_call():
    try:
-        response = completion(
+        print("test acompletion + streaming")
-            model="gpt-3.5-turbo", messages=messages, stream=True, logger_fn=logger_fn
+        response = await acompletion(
            model="gpt-3.5-turbo", 
            messages=[{"content": "Hello, how are you?", "role": "user"}], 
            stream=True
        )
        print(f"response: {response}")
        complete_response = ""
        start_time = time.time()
        # Change for loop to async for loop
        async for chunk in response:
-            chunk_time = time.time()
+            print(chunk)
            print(f"time since initial request: {chunk_time - start_time:.5f}")
            print(chunk["choices"][0]["delta"])
            complete_response += chunk["choices"][0]["delta"].get("content", "")
        if complete_response == "": 
            raise Exception("Empty response received")
    except:
        print(f"error occurred: {traceback.format_exc()}")
        pass