forked from phoenix/litellm-mirror
docs(routing.md): add timeouts per model
This commit is contained in:
parent
9988a39169
commit
265f5ef6da
1 changed files with 31 additions and 0 deletions
|
@ -302,6 +302,7 @@ asyncio.run(router_acompletion())
|
|||
|
||||
The timeout set in router is for the entire length of the call, and is passed down to the completion() call level as well.
|
||||
|
||||
**Global Timeouts**
|
||||
```python
|
||||
from litellm import Router
|
||||
|
||||
|
@ -313,6 +314,36 @@ router = Router(model_list=model_list,
|
|||
print(response)
|
||||
```
|
||||
|
||||
**Timeouts per model**
|
||||
|
||||
```python
|
||||
from litellm import Router
|
||||
import asyncio
|
||||
|
||||
model_list = [{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"timeout": 300 # sets a 5 minute timeout
|
||||
"stream_timeout": 30 # sets a 30s timeout for streaming calls
|
||||
}
|
||||
}]
|
||||
|
||||
# init router
|
||||
router = Router(model_list=model_list, routing_strategy="least-busy")
|
||||
async def router_acompletion():
|
||||
response = await router.acompletion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}]
|
||||
)
|
||||
print(response)
|
||||
return response
|
||||
|
||||
asyncio.run(router_acompletion())
|
||||
```
|
||||
### Cooldowns
|
||||
|
||||
Set the limit for how many calls a model is allowed to fail in a minute, before being cooled down for a minute.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue