mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 10:14:26 +00:00
docs(simple_proxy.md): add tutorial for doing fallbacks + retries + timeouts on the proxy
This commit is contained in:
parent
3dcbf6197a
commit
12dbdc4c15
3 changed files with 48 additions and 9 deletions
|
@ -853,6 +853,42 @@ curl --location 'http://0.0.0.0:8000/chat/completions' \
|
|||
'
|
||||
```
|
||||
|
||||
### Fallbacks + Retries + Timeouts
|
||||
|
||||
If a call fails after num_retries, fall back to another model group.
|
||||
|
||||
If the error is a context window exceeded error, fall back to a larger model group (if given).
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: zephyr-beta
|
||||
litellm_params:
|
||||
model: huggingface/HuggingFaceH4/zephyr-7b-beta
|
||||
api_base: http://0.0.0.0:8001
|
||||
- model_name: zephyr-beta
|
||||
litellm_params:
|
||||
model: huggingface/HuggingFaceH4/zephyr-7b-beta
|
||||
api_base: http://0.0.0.0:8002
|
||||
- model_name: zephyr-beta
|
||||
litellm_params:
|
||||
model: huggingface/HuggingFaceH4/zephyr-7b-beta
|
||||
api_base: http://0.0.0.0:8003
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
api_key: <my-openai-key>
|
||||
- model_name: gpt-3.5-turbo-16k
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo-16k
|
||||
api_key: <my-openai-key>
|
||||
|
||||
litellm_settings:
|
||||
num_retries: 3 # retry call 3 times on each model_name (e.g. zephyr-beta)
|
||||
request_timeout: 10 # raise Timeout error if call takes longer than 10s
|
||||
fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo"]}] # fallback to gpt-3.5-turbo if call fails num_retries
|
||||
context_window_fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo-16k"]}, {"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}] # fallback to gpt-3.5-turbo-16k if context window error
|
||||
```
|
||||
|
||||
### Set Custom Prompt Templates
|
||||
|
||||
LiteLLM by default checks if a model has a [prompt template and applies it](./completion/prompt_formatting.md) (e.g. if a huggingface model has a saved chat template in it's tokenizer_config.json). However, you can also set a custom prompt template on your proxy in the `config.yaml`:
|
||||
|
|
|
@ -21,7 +21,6 @@ telemetry = True
|
|||
max_tokens = 256 # OpenAI Defaults
|
||||
drop_params = False
|
||||
retry = True
|
||||
request_timeout: Optional[float] = 6000
|
||||
api_key: Optional[str] = None
|
||||
openai_key: Optional[str] = None
|
||||
azure_key: Optional[str] = None
|
||||
|
@ -50,10 +49,14 @@ error_logs: Dict = {}
|
|||
add_function_to_prompt: bool = False # if function calling not supported by api, append function call details to system prompt
|
||||
client_session: Optional[httpx.Client] = None
|
||||
aclient_session: Optional[httpx.AsyncClient] = None
|
||||
model_fallbacks: Optional[List] = None
|
||||
model_fallbacks: Optional[List] = None # Deprecated for 'litellm.fallbacks'
|
||||
model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
|
||||
num_retries: Optional[int] = None
|
||||
suppress_debug_info = False
|
||||
#### RELIABILITY ####
|
||||
request_timeout: Optional[float] = 6000
|
||||
num_retries: Optional[int] = None
|
||||
fallbacks: Optional[List] = None
|
||||
context_window_fallbacks: Optional[List] = None
|
||||
#############################################
|
||||
|
||||
def get_model_cost_map(url: str):
|
||||
|
|
|
@ -64,7 +64,7 @@ class Router:
|
|||
redis_password: Optional[str] = None,
|
||||
cache_responses: bool = False,
|
||||
num_retries: int = 0,
|
||||
timeout: float = 600,
|
||||
timeout: Optional[float] = None,
|
||||
default_litellm_params = {}, # default params for Router.chat.completion.create
|
||||
set_verbose: bool = False,
|
||||
fallbacks: List = [],
|
||||
|
@ -79,12 +79,12 @@ class Router:
|
|||
for m in model_list:
|
||||
self.deployment_latency_map[m["litellm_params"]["model"]] = 0
|
||||
|
||||
self.num_retries = num_retries
|
||||
self.set_verbose = set_verbose
|
||||
self.timeout = timeout
|
||||
self.num_retries = num_retries or litellm.num_retries
|
||||
self.set_verbose = set_verbose
|
||||
self.timeout = timeout or litellm.request_timeout
|
||||
self.routing_strategy = routing_strategy
|
||||
self.fallbacks = fallbacks
|
||||
self.context_window_fallbacks = context_window_fallbacks
|
||||
self.fallbacks = fallbacks or litellm.fallbacks
|
||||
self.context_window_fallbacks = context_window_fallbacks or litellm.context_window_fallbacks
|
||||
|
||||
# make Router.chat.completions.create compatible for openai.chat.completions.create
|
||||
self.chat = litellm.Chat(params=default_litellm_params)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue