From f1dbdb58bbb40e3bbe12051e8b0e3cce24a54498 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Fri, 24 Nov 2023 13:37:16 -0800
Subject: [PATCH] docs(simple_proxy.md): add cooldown to docs

---
 docs/my-website/docs/routing.md      | 30 ++++++++++++++++++++++++----
 docs/my-website/docs/simple_proxy.md |  5 ++++-
 2 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md
index a8f7b16c1c..03d95e9ef2 100644
--- a/docs/my-website/docs/routing.md
+++ b/docs/my-website/docs/routing.md
@@ -188,11 +188,33 @@ from litellm import Router
 model_list = [{...}]
 
 router = Router(model_list=model_list, 
-                timeout=30) # timeout set to 30s 
+                timeout=30) # raise timeout error if call takes > 30s 
 
 print(response)
 ```
 
+### Cooldowns
+
+Set the limit for how many calls a model is allowed to fail in a minute, before being cooled down for a minute. 
+
+```python
+from litellm import Router
+
+model_list = [{...}]
+
+router = Router(model_list=model_list, 
+                allowed_fails=1) # cooldown model if it fails > 1 call in a minute. 
+
+user_message = "Hello, whats the weather in San Francisco??"
+messages = [{"content": user_message, "role": "user"}]
+
+# normal call 
+response = router.completion(model="gpt-3.5-turbo", messages=messages)
+
+print(f"response: {response}")
+
+```
+
 ### Retries
 
 For both async + sync functions, we support retrying failed requests. 
@@ -206,9 +228,9 @@ Here's a quick look at how we can set `num_retries = 3`:
 ```python 
 from litellm import Router
 
-router = Router(model_list=model_list, 
-                cache_responses=True, 
-                timeout=30, 
+model_list = [{...}]
+
+router = Router(model_list=model_list,  
                 num_retries=3)
 
 user_message = "Hello, whats the weather in San Francisco??"
diff --git a/docs/my-website/docs/simple_proxy.md b/docs/my-website/docs/simple_proxy.md
index 718b712171..fd0884420b 100644
--- a/docs/my-website/docs/simple_proxy.md
+++ b/docs/my-website/docs/simple_proxy.md
@@ -853,12 +853,14 @@ curl --location 'http://0.0.0.0:8000/chat/completions' \
 '
 ```
 
-### Fallbacks + Retries + Timeouts 
+### Fallbacks + Cooldowns + Retries + Timeouts 
 
 If a call fails after num_retries, fall back to another model group.
 
 If the error is a context window exceeded error, fall back to a larger model group (if given).
 
+[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/router.py)
+
 ```yaml
 model_list:
   - model_name: zephyr-beta
@@ -887,6 +889,7 @@ litellm_settings:
   request_timeout: 10 # raise Timeout error if call takes longer than 10s
   fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo"]}] # fallback to gpt-3.5-turbo if call fails num_retries 
   context_window_fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo-16k"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}] # fallback to gpt-3.5-turbo-16k if context window error
+  allowed_fails: 3 # cooldown model if it fails > 1 call in a minute. 
 ```
 
 ### Set Custom Prompt Templates