(docs) improve async + streaming completion

This commit is contained in:
ishaan-jaff 2023-10-26 12:10:37 -07:00
parent 8d2a4a597a
commit 3563ae81a8

View file

@ -2,11 +2,13 @@
- [Streaming Responses](#streaming-responses)
- [Async Completion](#async-completion)
- [Async + Streaming Completion](#async-streaming)
## Streaming Responses
LiteLLM supports streaming the model response back by passing `stream=True` as an argument to the completion function
### Usage
```python
from litellm import completion
response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
for chunk in response:
print(chunk['choices'][0]['delta'])
@ -37,34 +39,20 @@ We've implemented an `__anext__()` function in the streaming object returned. Th
### Usage
Here's an example of using it with openai.
```python
from litellm import completion
import asyncio, os, traceback, time
os.environ["OPENAI_API_KEY"] = "your-api-key"
def logger_fn(model_call_object: dict):
print(f"LOGGER FUNCTION: {model_call_object}")
user_message = "Hello, how are you?"
messages = [{"content": user_message, "role": "user"}]
from litellm import acompletion
import asyncio, os, traceback
async def completion_call():
try:
response = completion(
model="gpt-3.5-turbo", messages=messages, stream=True, logger_fn=logger_fn
print("test acompletion + streaming")
response = await acompletion(
model="gpt-3.5-turbo",
messages=[{"content": "Hello, how are you?", "role": "user"}],
stream=True
)
print(f"response: {response}")
complete_response = ""
start_time = time.time()
# Change for loop to async for loop
async for chunk in response:
chunk_time = time.time()
print(f"time since initial request: {chunk_time - start_time:.5f}")
print(chunk["choices"][0]["delta"])
complete_response += chunk["choices"][0]["delta"].get("content", "")
if complete_response == "":
raise Exception("Empty response received")
print(chunk)
except:
print(f"error occurred: {traceback.format_exc()}")
pass