From 12dbdc4c15787a7d20596aee91917dd034abb1c6 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Fri, 24 Nov 2023 12:20:38 -0800
Subject: [PATCH] docs(simple_proxy.md): add tutorial for doing fallbacks +
 retries + timeouts on the proxy

---
 docs/my-website/docs/simple_proxy.md | 36 ++++++++++++++++++++++++++++
 litellm/__init__.py                  |  9 ++++---
 litellm/router.py                    | 12 +++++-----
 3 files changed, 48 insertions(+), 9 deletions(-)
diff --git a/docs/my-website/docs/simple_proxy.md b/docs/my-website/docs/simple_proxy.md
index a6c5d159d4..718b712171 100644
--- a/docs/my-website/docs/simple_proxy.md
+++ b/docs/my-website/docs/simple_proxy.md
@@ -853,6 +853,42 @@ curl --location 'http://0.0.0.0:8000/chat/completions' \
 '
 ```
 
+### Fallbacks + Retries + Timeouts 
+
+If a call fails after num_retries, fall back to another model group.
+
+If the error is a context window exceeded error, fall back to a larger model group (if given).
+
+```yaml
+model_list:
+  - model_name: zephyr-beta
+    litellm_params:
+        model: huggingface/HuggingFaceH4/zephyr-7b-beta
+        api_base: http://0.0.0.0:8001
+  - model_name: zephyr-beta
+    litellm_params:
+        model: huggingface/HuggingFaceH4/zephyr-7b-beta
+        api_base: http://0.0.0.0:8002
+  - model_name: zephyr-beta
+    litellm_params:
+        model: huggingface/HuggingFaceH4/zephyr-7b-beta
+        api_base: http://0.0.0.0:8003
+  - model_name: gpt-3.5-turbo
+    litellm_params:
+        model: gpt-3.5-turbo
+        api_key: <my-openai-key>
+  - model_name: gpt-3.5-turbo-16k
+    litellm_params:
+        model: gpt-3.5-turbo-16k
+        api_key: <my-openai-key>
+
+litellm_settings:
+  num_retries: 3 # retry call 3 times on each model_name (e.g. zephyr-beta)
+  request_timeout: 10 # raise Timeout error if call takes longer than 10s
+  fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo"]}] # fallback to gpt-3.5-turbo if call fails num_retries 
+  context_window_fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo-16k"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}] # fallback to gpt-3.5-turbo-16k if context window error
+```
+
 ### Set Custom Prompt Templates
 
 LiteLLM by default checks if a model has a [prompt template and applies it](./completion/prompt_formatting.md) (e.g. if a huggingface model has a saved chat template in it's tokenizer_config.json). However, you can also set a custom prompt template on your proxy in the `config.yaml`: 
diff --git a/litellm/__init__.py b/litellm/__init__.py
index fe6ceede75..e7ad788a01 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -21,7 +21,6 @@ telemetry = True
 max_tokens = 256  # OpenAI Defaults
 drop_params = False
 retry = True
-request_timeout: Optional[float] = 6000
 api_key: Optional[str] = None
 openai_key: Optional[str] = None
 azure_key: Optional[str] = None
@@ -50,10 +49,14 @@ error_logs: Dict = {}
 add_function_to_prompt: bool = False # if function calling not supported by api, append function call details to system prompt
 client_session: Optional[httpx.Client] = None
 aclient_session: Optional[httpx.AsyncClient] = None
-model_fallbacks: Optional[List] = None
+model_fallbacks: Optional[List] = None # Deprecated for 'litellm.fallbacks'
 model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
-num_retries: Optional[int] = None
 suppress_debug_info = False
+#### RELIABILITY ####
+request_timeout: Optional[float] = 6000
+num_retries: Optional[int] = None
+fallbacks: Optional[List] = None
+context_window_fallbacks: Optional[List] = None
 #############################################
 
 def get_model_cost_map(url: str):
diff --git a/litellm/router.py b/litellm/router.py
index 9516940e5c..6c0ddbcb8a 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -64,7 +64,7 @@ class Router:
                  redis_password: Optional[str] = None,
                  cache_responses: bool = False,
                  num_retries: int = 0,
-                 timeout: float = 600,
+                 timeout: Optional[float] = None,
                  default_litellm_params = {}, # default params for Router.chat.completion.create 
                  set_verbose: bool = False,
                  fallbacks: List = [],
@@ -79,12 +79,12 @@ class Router:
             for m in model_list: 
                 self.deployment_latency_map[m["litellm_params"]["model"]] = 0
         
-        self.num_retries = num_retries
-        self.set_verbose = set_verbose
-        self.timeout = timeout
+        self.num_retries = num_retries or litellm.num_retries
+        self.set_verbose = set_verbose 
+        self.timeout = timeout or litellm.request_timeout
         self.routing_strategy = routing_strategy
-        self.fallbacks = fallbacks
-        self.context_window_fallbacks = context_window_fallbacks
+        self.fallbacks = fallbacks or litellm.fallbacks
+        self.context_window_fallbacks = context_window_fallbacks or litellm.context_window_fallbacks
 
         # make Router.chat.completions.create compatible for openai.chat.completions.create
         self.chat = litellm.Chat(params=default_litellm_params)