forked from phoenix/litellm-mirror
(docs) router settings - on litellm config (#6037)
* add yaml with all router settings * add docs for router settings * docs router settings litellm settings
This commit is contained in:
parent
0c9c42915f
commit
6d1de8e1ee
1 changed files with 86 additions and 64 deletions
|
@ -8,8 +8,8 @@ Set model list, `api_base`, `api_key`, `temperature` & proxy server settings (`m
|
||||||
| Param Name | Description |
|
| Param Name | Description |
|
||||||
|----------------------|---------------------------------------------------------------|
|
|----------------------|---------------------------------------------------------------|
|
||||||
| `model_list` | List of supported models on the server, with model-specific configs |
|
| `model_list` | List of supported models on the server, with model-specific configs |
|
||||||
| `router_settings` | litellm Router settings, example `routing_strategy="least-busy"` [**see all**](https://github.com/BerriAI/litellm/blob/6ef0e8485e0e720c0efa6f3075ce8119f2f62eea/litellm/router.py#L64)|
|
| `router_settings` | litellm Router settings, example `routing_strategy="least-busy"` [**see all**](#router-settings)|
|
||||||
| `litellm_settings` | litellm Module settings, example `litellm.drop_params=True`, `litellm.set_verbose=True`, `litellm.api_base`, `litellm.cache` [**see all**](https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py)|
|
| `litellm_settings` | litellm Module settings, example `litellm.drop_params=True`, `litellm.set_verbose=True`, `litellm.api_base`, `litellm.cache` [**see all**](#all-settings)|
|
||||||
| `general_settings` | Server settings, example setting `master_key: sk-my_special_key` |
|
| `general_settings` | Server settings, example setting `master_key: sk-my_special_key` |
|
||||||
| `environment_variables` | Environment Variables example, `REDIS_HOST`, `REDIS_PORT` |
|
| `environment_variables` | Environment Variables example, `REDIS_HOST`, `REDIS_PORT` |
|
||||||
|
|
||||||
|
@ -559,8 +559,8 @@ model_list:
|
||||||
initial_prompt_value: "\n"
|
initial_prompt_value: "\n"
|
||||||
roles: {"system":{"pre_message":"<|im_start|>system\n", "post_message":"<|im_end|>"}, "assistant":{"pre_message":"<|im_start|>assistant\n","post_message":"<|im_end|>"}, "user":{"pre_message":"<|im_start|>user\n","post_message":"<|im_end|>"}}
|
roles: {"system":{"pre_message":"<|im_start|>system\n", "post_message":"<|im_end|>"}, "assistant":{"pre_message":"<|im_start|>assistant\n","post_message":"<|im_end|>"}, "user":{"pre_message":"<|im_start|>user\n","post_message":"<|im_end|>"}}
|
||||||
final_prompt_value: "\n"
|
final_prompt_value: "\n"
|
||||||
bos_token: "<s>"
|
bos_token: " "
|
||||||
eos_token: "</s>"
|
eos_token: " "
|
||||||
max_tokens: 4096
|
max_tokens: 4096
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -582,70 +582,92 @@ general_settings:
|
||||||
|
|
||||||
## **All settings**
|
## **All settings**
|
||||||
|
|
||||||
```python
|
|
||||||
{
|
|
||||||
"environment_variables": {},
|
|
||||||
"model_list": [
|
|
||||||
{
|
|
||||||
"model_name": "string",
|
|
||||||
"litellm_params": {},
|
|
||||||
"model_info": {
|
|
||||||
"id": "string",
|
|
||||||
"mode": "embedding",
|
|
||||||
"input_cost_per_token": 0,
|
|
||||||
"output_cost_per_token": 0,
|
|
||||||
"max_tokens": 2048,
|
|
||||||
"base_model": "gpt-4-1106-preview",
|
|
||||||
"additionalProp1": {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"litellm_settings": {
|
|
||||||
"success_callback": "List[str]" # list of success callbacks - eg ["langfuse"]
|
|
||||||
"failure_callback": "List[str]" # list of failure callbacks - eg ["sentry"]
|
|
||||||
"callbacks": "List[str] or str" # list of callbacks - runs on success and failure - eg ["otel"]
|
|
||||||
"service_callbacks": "List[str]" # logs redis, postgres failures on datadog, prometheus
|
|
||||||
|
|
||||||
"turn_off_message_logging": "boolean" # prevent the messages and responses from being logged to on your callbacks, but request metadata will still be logged.
|
```yaml
|
||||||
"redact_user_api_key_info": "boolean" # Redact information about the user api key (hashed token, user_id, team id, etc.), from logs. Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging.
|
environment_variables: {}
|
||||||
|
|
||||||
}, # ALL (https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py)
|
model_list:
|
||||||
"callback_settings": {
|
- model_name: string
|
||||||
"otel": { # OTEL logging callback specific settings
|
litellm_params: {}
|
||||||
"message_logging": "boolean"
|
model_info:
|
||||||
}
|
id: string
|
||||||
},
|
mode: embedding
|
||||||
"general_settings": {
|
input_cost_per_token: 0
|
||||||
"completion_model": "string",
|
output_cost_per_token: 0
|
||||||
"disable_spend_logs": "boolean", # turn off writing each transaction to the db
|
max_tokens: 2048
|
||||||
"disable_master_key_return": "boolean", # turn off returning master key on UI (checked on '/user/info' endpoint)
|
base_model: gpt-4-1106-preview
|
||||||
"disable_retry_on_max_parallel_request_limit_error": "boolean", # turn off retries when max parallel request limit is reached
|
additionalProp1: {}
|
||||||
"disable_reset_budget": "boolean", # turn off reset budget scheduled task
|
|
||||||
"disable_adding_master_key_hash_to_db": "boolean", # turn off storing master key hash in db, for spend tracking
|
litellm_settings:
|
||||||
"enable_jwt_auth": "boolean", # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims
|
success_callback: ["langfuse"] # list of success callbacks
|
||||||
"enforce_user_param": "boolean", # requires all openai endpoint requests to have a 'user' param
|
failure_callback: ["sentry"] # list of failure callbacks
|
||||||
"allowed_routes": "list", # list of allowed proxy API routes - a user can access. (currently JWT-Auth only)
|
callbacks: ["otel"] # list of callbacks - runs on success and failure
|
||||||
"key_management_system": "google_kms", # either google_kms or azure_kms
|
service_callbacks: ["datadog", "prometheus"] # logs redis, postgres failures on datadog, prometheus
|
||||||
"master_key": "string",
|
turn_off_message_logging: boolean # prevent the messages and responses from being logged to on your callbacks, but request metadata will still be logged.
|
||||||
"database_url": "string",
|
redact_user_api_key_info: boolean # Redact information about the user api key (hashed token, user_id, team id, etc.), from logs. Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging.
|
||||||
"database_connection_pool_limit": 0, # default 100
|
|
||||||
"database_connection_timeout": 0, # default 60s
|
callback_settings:
|
||||||
"otel": true,
|
otel:
|
||||||
"custom_auth": "string",
|
message_logging: boolean # OTEL logging callback specific settings
|
||||||
"max_parallel_requests": 0, # the max parallel requests allowed per deployment
|
|
||||||
"global_max_parallel_requests": 0, # the max parallel requests allowed on the proxy all up
|
general_settings:
|
||||||
"infer_model_from_keys": true,
|
completion_model: string
|
||||||
"background_health_checks": true,
|
disable_spend_logs: boolean # turn off writing each transaction to the db
|
||||||
"health_check_interval": 300,
|
disable_master_key_return: boolean # turn off returning master key on UI (checked on '/user/info' endpoint)
|
||||||
"alerting": [
|
disable_retry_on_max_parallel_request_limit_error: boolean # turn off retries when max parallel request limit is reached
|
||||||
"string"
|
disable_reset_budget: boolean # turn off reset budget scheduled task
|
||||||
],
|
disable_adding_master_key_hash_to_db: boolean # turn off storing master key hash in db, for spend tracking
|
||||||
"alerting_threshold": 0,
|
enable_jwt_auth: boolean # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims
|
||||||
"use_client_credentials_pass_through_routes" : "boolean", # use client credentials for all pass through routes like "/vertex-ai", /bedrock/. When this is True Virtual Key auth will not be applied on these endpoints" https://docs.litellm.ai/docs/pass_through/vertex_ai
|
enforce_user_param: boolean # requires all openai endpoint requests to have a 'user' param
|
||||||
}
|
allowed_routes: ["route1", "route2"] # list of allowed proxy API routes - a user can access. (currently JWT-Auth only)
|
||||||
}
|
key_management_system: google_kms # either google_kms or azure_kms
|
||||||
|
master_key: string
|
||||||
|
database_url: string
|
||||||
|
database_connection_pool_limit: 0 # default 100
|
||||||
|
database_connection_timeout: 0 # default 60s
|
||||||
|
custom_auth: string
|
||||||
|
max_parallel_requests: 0 # the max parallel requests allowed per deployment
|
||||||
|
global_max_parallel_requests: 0 # the max parallel requests allowed on the proxy all up
|
||||||
|
infer_model_from_keys: true
|
||||||
|
background_health_checks: true
|
||||||
|
health_check_interval: 300
|
||||||
|
alerting: ["slack", "email"]
|
||||||
|
alerting_threshold: 0
|
||||||
|
use_client_credentials_pass_through_routes: boolean # use client credentials for all pass through routes like "/vertex-ai", /bedrock/. When this is True Virtual Key auth will not be applied on these endpoints
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Router Settings
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
router_settings:
|
||||||
|
routing_strategy: usage-based-routing-v2 # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
|
||||||
|
redis_host: <your-redis-host> # string
|
||||||
|
redis_password: <your-redis-password> # string
|
||||||
|
redis_port: <your-redis-port> # string
|
||||||
|
enable_pre_call_check: true # bool - Before call is made check if a call is within model context window
|
||||||
|
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
|
||||||
|
cooldown_time: 30 # (in seconds) how long to cooldown model if fails/min > allowed_fails
|
||||||
|
disable_cooldowns: True # bool - Disable cooldowns for all models
|
||||||
|
retry_policy: { # Dict[str, int]: retry policy for different types of exceptions
|
||||||
|
"AuthenticationErrorRetries": 3,
|
||||||
|
"TimeoutErrorRetries": 3,
|
||||||
|
"RateLimitErrorRetries": 3,
|
||||||
|
"ContentPolicyViolationErrorRetries": 4,
|
||||||
|
"InternalServerErrorRetries": 4
|
||||||
|
}
|
||||||
|
allowed_fails_policy: {
|
||||||
|
"BadRequestErrorAllowedFails": 1000, # Allow 1000 BadRequestErrors before cooling down a deployment
|
||||||
|
"AuthenticationErrorAllowedFails": 10, # int
|
||||||
|
"TimeoutErrorAllowedFails": 12, # int
|
||||||
|
"RateLimitErrorAllowedFails": 10000, # int
|
||||||
|
"ContentPolicyViolationErrorAllowedFails": 15, # int
|
||||||
|
"InternalServerErrorAllowedFails": 20, # int
|
||||||
|
}
|
||||||
|
content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}] # List[Dict[str, List[str]]]: Fallback model for content policy violations
|
||||||
|
fallbacks=[{"claude-2": ["my-fallback-model"]}] # List[Dict[str, List[str]]]: Fallback model for all errors
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
## Extras
|
## Extras
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue