forked from phoenix/litellm-mirror
(docs) improve async + streaming completion
This commit is contained in:
parent
8d2a4a597a
commit
3563ae81a8
1 changed files with 10 additions and 22 deletions
|
@ -2,11 +2,13 @@
|
||||||
|
|
||||||
- [Streaming Responses](#streaming-responses)
|
- [Streaming Responses](#streaming-responses)
|
||||||
- [Async Completion](#async-completion)
|
- [Async Completion](#async-completion)
|
||||||
|
- [Async + Streaming Completion](#async-streaming)
|
||||||
|
|
||||||
## Streaming Responses
|
## Streaming Responses
|
||||||
LiteLLM supports streaming the model response back by passing `stream=True` as an argument to the completion function
|
LiteLLM supports streaming the model response back by passing `stream=True` as an argument to the completion function
|
||||||
### Usage
|
### Usage
|
||||||
```python
|
```python
|
||||||
|
from litellm import completion
|
||||||
response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
|
response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
|
||||||
for chunk in response:
|
for chunk in response:
|
||||||
print(chunk['choices'][0]['delta'])
|
print(chunk['choices'][0]['delta'])
|
||||||
|
@ -37,34 +39,20 @@ We've implemented an `__anext__()` function in the streaming object returned. Th
|
||||||
### Usage
|
### Usage
|
||||||
Here's an example of using it with openai.
|
Here's an example of using it with openai.
|
||||||
```python
|
```python
|
||||||
from litellm import completion
|
from litellm import acompletion
|
||||||
import asyncio, os, traceback, time
|
import asyncio, os, traceback
|
||||||
|
|
||||||
os.environ["OPENAI_API_KEY"] = "your-api-key"
|
|
||||||
|
|
||||||
def logger_fn(model_call_object: dict):
|
|
||||||
print(f"LOGGER FUNCTION: {model_call_object}")
|
|
||||||
|
|
||||||
|
|
||||||
user_message = "Hello, how are you?"
|
|
||||||
messages = [{"content": user_message, "role": "user"}]
|
|
||||||
|
|
||||||
async def completion_call():
|
async def completion_call():
|
||||||
try:
|
try:
|
||||||
response = completion(
|
print("test acompletion + streaming")
|
||||||
model="gpt-3.5-turbo", messages=messages, stream=True, logger_fn=logger_fn
|
response = await acompletion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"content": "Hello, how are you?", "role": "user"}],
|
||||||
|
stream=True
|
||||||
)
|
)
|
||||||
print(f"response: {response}")
|
print(f"response: {response}")
|
||||||
complete_response = ""
|
|
||||||
start_time = time.time()
|
|
||||||
# Change for loop to async for loop
|
|
||||||
async for chunk in response:
|
async for chunk in response:
|
||||||
chunk_time = time.time()
|
print(chunk)
|
||||||
print(f"time since initial request: {chunk_time - start_time:.5f}")
|
|
||||||
print(chunk["choices"][0]["delta"])
|
|
||||||
complete_response += chunk["choices"][0]["delta"].get("content", "")
|
|
||||||
if complete_response == "":
|
|
||||||
raise Exception("Empty response received")
|
|
||||||
except:
|
except:
|
||||||
print(f"error occurred: {traceback.format_exc()}")
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue