forked from phoenix/litellm-mirror
(docs) improve async + streaming completion
This commit is contained in:
parent
8d2a4a597a
commit
3563ae81a8
1 changed files with 10 additions and 22 deletions
|
@ -2,11 +2,13 @@
|
|||
|
||||
- [Streaming Responses](#streaming-responses)
|
||||
- [Async Completion](#async-completion)
|
||||
- [Async + Streaming Completion](#async-streaming)
|
||||
|
||||
## Streaming Responses
|
||||
LiteLLM supports streaming the model response back by passing `stream=True` as an argument to the completion function
|
||||
### Usage
|
||||
```python
|
||||
from litellm import completion
|
||||
response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
|
||||
for chunk in response:
|
||||
print(chunk['choices'][0]['delta'])
|
||||
|
@ -37,34 +39,20 @@ We've implemented an `__anext__()` function in the streaming object returned. Th
|
|||
### Usage
|
||||
Here's an example of using it with openai.
|
||||
```python
|
||||
from litellm import completion
|
||||
import asyncio, os, traceback, time
|
||||
|
||||
os.environ["OPENAI_API_KEY"] = "your-api-key"
|
||||
|
||||
def logger_fn(model_call_object: dict):
|
||||
print(f"LOGGER FUNCTION: {model_call_object}")
|
||||
|
||||
|
||||
user_message = "Hello, how are you?"
|
||||
messages = [{"content": user_message, "role": "user"}]
|
||||
from litellm import acompletion
|
||||
import asyncio, os, traceback
|
||||
|
||||
async def completion_call():
|
||||
try:
|
||||
response = completion(
|
||||
model="gpt-3.5-turbo", messages=messages, stream=True, logger_fn=logger_fn
|
||||
print("test acompletion + streaming")
|
||||
response = await acompletion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"content": "Hello, how are you?", "role": "user"}],
|
||||
stream=True
|
||||
)
|
||||
print(f"response: {response}")
|
||||
complete_response = ""
|
||||
start_time = time.time()
|
||||
# Change for loop to async for loop
|
||||
async for chunk in response:
|
||||
chunk_time = time.time()
|
||||
print(f"time since initial request: {chunk_time - start_time:.5f}")
|
||||
print(chunk["choices"][0]["delta"])
|
||||
complete_response += chunk["choices"][0]["delta"].get("content", "")
|
||||
if complete_response == "":
|
||||
raise Exception("Empty response received")
|
||||
print(chunk)
|
||||
except:
|
||||
print(f"error occurred: {traceback.format_exc()}")
|
||||
pass
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue