From 3563ae81a8777c1d52fff75c928bfb96fe3cba0e Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 26 Oct 2023 12:10:37 -0700 Subject: [PATCH] (docs) improve async + streaming completion --- docs/my-website/docs/completion/stream.md | 32 +++++++---------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/docs/my-website/docs/completion/stream.md b/docs/my-website/docs/completion/stream.md index a40b462cc..413076dc9 100644 --- a/docs/my-website/docs/completion/stream.md +++ b/docs/my-website/docs/completion/stream.md @@ -2,11 +2,13 @@ - [Streaming Responses](#streaming-responses) - [Async Completion](#async-completion) +- [Async + Streaming Completion](#async-streaming) ## Streaming Responses LiteLLM supports streaming the model response back by passing `stream=True` as an argument to the completion function ### Usage ```python +from litellm import completion response = completion(model="gpt-3.5-turbo", messages=messages, stream=True) for chunk in response: print(chunk['choices'][0]['delta']) @@ -37,34 +39,20 @@ We've implemented an `__anext__()` function in the streaming object returned. Th ### Usage Here's an example of using it with openai. ```python -from litellm import completion -import asyncio, os, traceback, time - -os.environ["OPENAI_API_KEY"] = "your-api-key" - -def logger_fn(model_call_object: dict): - print(f"LOGGER FUNCTION: {model_call_object}") - - -user_message = "Hello, how are you?" -messages = [{"content": user_message, "role": "user"}] +from litellm import acompletion +import asyncio, os, traceback async def completion_call(): try: - response = completion( - model="gpt-3.5-turbo", messages=messages, stream=True, logger_fn=logger_fn + print("test acompletion + streaming") + response = await acompletion( + model="gpt-3.5-turbo", + messages=[{"content": "Hello, how are you?", "role": "user"}], + stream=True ) print(f"response: {response}") - complete_response = "" - start_time = time.time() - # Change for loop to async for loop async for chunk in response: - chunk_time = time.time() - print(f"time since initial request: {chunk_time - start_time:.5f}") - print(chunk["choices"][0]["delta"]) - complete_response += chunk["choices"][0]["delta"].get("content", "") - if complete_response == "": - raise Exception("Empty response received") + print(chunk) except: print(f"error occurred: {traceback.format_exc()}") pass