mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
docs(simple_proxy.md): add cooldown to docs
This commit is contained in:
parent
2686894823
commit
f1dbdb58bb
2 changed files with 30 additions and 5 deletions
|
@ -188,11 +188,33 @@ from litellm import Router
|
|||
model_list = [{...}]
|
||||
|
||||
router = Router(model_list=model_list,
|
||||
timeout=30) # timeout set to 30s
|
||||
timeout=30) # raise timeout error if call takes > 30s
|
||||
|
||||
print(response)
|
||||
```
|
||||
|
||||
### Cooldowns
|
||||
|
||||
Set the limit for how many calls a model is allowed to fail in a minute, before being cooled down for a minute.
|
||||
|
||||
```python
|
||||
from litellm import Router
|
||||
|
||||
model_list = [{...}]
|
||||
|
||||
router = Router(model_list=model_list,
|
||||
allowed_fails=1) # cooldown model if it fails > 1 call in a minute.
|
||||
|
||||
user_message = "Hello, whats the weather in San Francisco??"
|
||||
messages = [{"content": user_message, "role": "user"}]
|
||||
|
||||
# normal call
|
||||
response = router.completion(model="gpt-3.5-turbo", messages=messages)
|
||||
|
||||
print(f"response: {response}")
|
||||
|
||||
```
|
||||
|
||||
### Retries
|
||||
|
||||
For both async + sync functions, we support retrying failed requests.
|
||||
|
@ -206,9 +228,9 @@ Here's a quick look at how we can set `num_retries = 3`:
|
|||
```python
|
||||
from litellm import Router
|
||||
|
||||
router = Router(model_list=model_list,
|
||||
cache_responses=True,
|
||||
timeout=30,
|
||||
model_list = [{...}]
|
||||
|
||||
router = Router(model_list=model_list,
|
||||
num_retries=3)
|
||||
|
||||
user_message = "Hello, whats the weather in San Francisco??"
|
||||
|
|
|
@ -853,12 +853,14 @@ curl --location 'http://0.0.0.0:8000/chat/completions' \
|
|||
'
|
||||
```
|
||||
|
||||
### Fallbacks + Retries + Timeouts
|
||||
### Fallbacks + Cooldowns + Retries + Timeouts
|
||||
|
||||
If a call fails after num_retries, fall back to another model group.
|
||||
|
||||
If the error is a context window exceeded error, fall back to a larger model group (if given).
|
||||
|
||||
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/router.py)
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: zephyr-beta
|
||||
|
@ -887,6 +889,7 @@ litellm_settings:
|
|||
request_timeout: 10 # raise Timeout error if call takes longer than 10s
|
||||
fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo"]}] # fallback to gpt-3.5-turbo if call fails num_retries
|
||||
context_window_fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo-16k"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}] # fallback to gpt-3.5-turbo-16k if context window error
|
||||
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
|
||||
```
|
||||
|
||||
### Set Custom Prompt Templates
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue